From 17a0d0956795b1805814066f861a37ea1a607d46 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Tue, 18 May 2021 18:09:46 +0100
Subject: Cleanup usage of static, extern and typedef (#1256)

* Cleanup usage of static, extern and typedef

Remove static on functions defined headers, as it can result in
duplication in binaries.

Remove unnecessary extern keyword on a function declaration, as it is
the default behavior and can be puzzling when reading the code.

Remove the unused declaration of my_ilogb, which is never defined.

Remove unnecessary usage of typedef, as they are only increasing the
cognitive load of the code for no purpose.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Improve usage of inline and static in harness

Functions declared in header as static can trigger unused warnings when
(indirectly) included in translation units that do not use such
functions. Use inline instead, which also avoids duplicating symbols in
binaries.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 test_common/harness/alloc.h                        |  4 +--
 test_common/harness/fpcontrol.h                    |  6 ++--
 test_conformance/math_brute_force/function_list.h  | 16 +++++------
 test_conformance/math_brute_force/main.cpp         |  1 -
 .../math_brute_force/reference_math.cpp            | 16 +++++------
 test_conformance/math_brute_force/utility.h        | 33 +++++++++++-----------
 6 files changed, 37 insertions(+), 39 deletions(-)

diff --git a/test_common/harness/alloc.h b/test_common/harness/alloc.h
index 653dde05..3b00d7c9 100644
--- a/test_common/harness/alloc.h
+++ b/test_common/harness/alloc.h
@@ -29,7 +29,7 @@
 #include "mingw_compat.h"
 #endif
 
-static void* align_malloc(size_t size, size_t alignment)
+inline void* align_malloc(size_t size, size_t alignment)
 {
 #if defined(_WIN32) && defined(_MSC_VER)
     return _aligned_malloc(size, alignment);
@@ -53,7 +53,7 @@ static void* align_malloc(size_t size, size_t alignment)
 #endif
 }
 
-static void align_free(void* ptr)
+inline void align_free(void* ptr)
 {
 #if defined(_WIN32) && defined(_MSC_VER)
     _aligned_free(ptr);
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 40826c5c..9f065044 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -39,7 +39,7 @@ typedef int FPU_mode_type;
 extern __thread fpu_control_t fpu_control;
 #endif
 // Set the reference hardware floating point unit to FTZ mode
-static inline void ForceFTZ(FPU_mode_type *mode)
+inline void ForceFTZ(FPU_mode_type *mode)
 {
 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
     || defined(__MINGW32__)
@@ -65,7 +65,7 @@ static inline void ForceFTZ(FPU_mode_type *mode)
 }
 
 // Disable the denorm flush to zero
-static inline void DisableFTZ(FPU_mode_type *mode)
+inline void DisableFTZ(FPU_mode_type *mode)
 {
 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
     || defined(__MINGW32__)
@@ -91,7 +91,7 @@ static inline void DisableFTZ(FPU_mode_type *mode)
 }
 
 // Restore the reference hardware to floating point state indicated by *mode
-static inline void RestoreFPState(FPU_mode_type *mode)
+inline void RestoreFPState(FPU_mode_type *mode)
 {
 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
     || defined(__MINGW32__)
diff --git a/test_conformance/math_brute_force/function_list.h b/test_conformance/math_brute_force/function_list.h
index 38f739ce..95a29459 100644
--- a/test_conformance/math_brute_force/function_list.h
+++ b/test_conformance/math_brute_force/function_list.h
@@ -30,7 +30,7 @@
 
 #include "harness/mt19937.h"
 
-typedef union fptr {
+union fptr {
     void *p;
     double (*f_f)(double);
     double (*f_u)(cl_uint);
@@ -45,9 +45,9 @@ typedef union fptr {
     double (*f_ffpI)(double, double, int *);
     double (*f_fff)(double, double, double);
     float (*f_fma)(float, float, float, int);
-} fptr;
+};
 
-typedef union dptr {
+union dptr {
     void *p;
     long double (*f_f)(long double);
     long double (*f_u)(cl_ulong);
@@ -59,20 +59,20 @@ typedef union dptr {
     long double (*f_fpI)(long double, int *);
     long double (*f_ffpI)(long double, long double, int *);
     long double (*f_fff)(long double, long double, long double);
-} dptr;
+};
 
 struct Func;
 
-typedef struct vtbl
+struct vtbl
 {
     const char *type_name;
     int (*TestFunc)(const struct Func *, MTdata, bool);
     int (*DoubleTestFunc)(
         const struct Func *, MTdata,
         bool); // may be NULL if function is single precision only
-} vtbl;
+};
 
-typedef struct Func
+struct Func
 {
     const char *name; // common name, to be used as an argument in the shell
     const char *nameInCode; // name as it appears in the __kernel, usually the
@@ -88,7 +88,7 @@ typedef struct Func
     int ftz;
     int relaxed;
     const vtbl *vtbl_ptr;
-} Func;
+};
 
 
 extern const Func functionList[];
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d6c2f11f..e52f2f0a 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -167,7 +167,6 @@ static int doTest(const char *name)
     }
 
     {
-        extern int my_ilogb(double);
         if (0 == strcmp("ilogb", func_data->name))
         {
             InitILogbConstants();
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 3a6516ba..0b037e01 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -41,10 +41,10 @@
 #pragma STDC FP_CONTRACT OFF
 static void __log2_ep(double *hi, double *lo, double x);
 
-typedef union {
+union uint64d_t {
     uint64_t i;
     double d;
-} uint64d_t;
+};
 
 static const uint64d_t _CL_NAN = { 0x7ff8000000000000ULL };
 
@@ -2259,10 +2259,10 @@ long double reference_dividel(long double x, long double y)
     return dx / dy;
 }
 
-typedef struct
+struct double_double
 {
     double hi, lo;
-} double_double;
+};
 
 // Split doubles_double into a series of consecutive 26-bit precise doubles and
 // a remainder. Note for later -- for multiplication, it might be better to
@@ -3767,10 +3767,10 @@ static uint32_t two_over_pi[] = {
 static uint32_t pi_over_two[] = { 0x1,        0x2487ed51, 0x42d1846,
                                   0x26263314, 0x1701b839, 0x28948127 };
 
-typedef union {
+union d_ui64_t {
     uint64_t u;
     double d;
-} d_ui64_t;
+};
 
 // radix or base of representation
 #define RADIX (30)
@@ -3786,13 +3786,13 @@ d_ui64_t two_pow_two_mradix = { (uint64_t)(1023 - 2 * RADIX) << 52 };
 // extended fixed point representation of double precision
 // floating point number.
 // x = sign * [ sum_{i = 0 to 2} ( X[i] * 2^(index - i)*RADIX ) ]
-typedef struct
+struct eprep_t
 {
     uint32_t X[3]; // three 32 bit integers are sufficient to represnt double in
                    // base_30
     int index; // exponent bias
     int sign; // sign of double
-} eprep_t;
+};
 
 static eprep_t double_to_eprep(double x)
 {
diff --git a/test_conformance/math_brute_force/utility.h b/test_conformance/math_brute_force/utility.h
index ac4db9c8..b4a59edb 100644
--- a/test_conformance/math_brute_force/utility.h
+++ b/test_conformance/math_brute_force/utility.h
@@ -90,8 +90,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
                 bool relaxedMode);
 
 // used to convert a bucket of bits into a search pattern through double
-static inline double DoubleFromUInt32(uint32_t bits);
-static inline double DoubleFromUInt32(uint32_t bits)
+inline double DoubleFromUInt32(uint32_t bits)
 {
     union {
         uint64_t u;
@@ -117,25 +116,25 @@ void _LogBuildError(cl_program p, int line, const char *file);
 // premature flushing to zero.
 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
 // ulp_limit to be flushed to zero.
-static inline int IsFloatResultSubnormal(double x, float ulps)
+inline int IsFloatResultSubnormal(double x, float ulps)
 {
     x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
     return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
 }
 
-static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
+inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
 {
     x = x - abs_err;
     return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
 }
 
-static inline int IsDoubleResultSubnormal(long double x, float ulps)
+inline int IsDoubleResultSubnormal(long double x, float ulps)
 {
     x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
     return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
 }
 
-static inline int IsFloatInfinity(double x)
+inline int IsFloatInfinity(double x)
 {
     union {
         cl_float d;
@@ -145,7 +144,7 @@ static inline int IsFloatInfinity(double x)
     return ((u.u & 0x7fffffffU) == 0x7F800000U);
 }
 
-static inline int IsFloatMaxFloat(double x)
+inline int IsFloatMaxFloat(double x)
 {
     union {
         cl_float d;
@@ -155,7 +154,7 @@ static inline int IsFloatMaxFloat(double x)
     return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
 }
 
-static inline int IsFloatNaN(double x)
+inline int IsFloatNaN(double x)
 {
     union {
         cl_float d;
@@ -165,13 +164,13 @@ static inline int IsFloatNaN(double x)
     return ((u.u & 0x7fffffffU) > 0x7F800000U);
 }
 
-extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
+cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
 
 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
 // (that's x87 default state).  This causes problems with the tests that
 // convert long and ulong to float and double or otherwise deal with values
 // that need more precision than 53-bit. So, set the x87 to 64-bit precision.
-static inline void Force64BitFPUPrecision(void)
+inline void Force64BitFPUPrecision(void)
 {
 #if __MINGW32__
     // The usual method is to use _controlfp as follows:
@@ -202,17 +201,17 @@ static inline void Force64BitFPUPrecision(void)
 #endif
 }
 
-extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
+void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
 
-typedef union {
+union int32f_t {
     int32_t i;
     float f;
-} int32f_t;
+};
 
-typedef union {
+union int64d_t {
     int64_t l;
     double d;
-} int64d_t;
+};
 
 void MulD(double *rhi, double *rlo, double u, double v);
 void AddD(double *rhi, double *rlo, double a, double b);
@@ -229,7 +228,7 @@ void logFunctionInfo(const char *fname, unsigned int float_size,
 
 float getAllowedUlpError(const Func *f, const bool relaxed);
 
-static inline cl_uint getTestScale(size_t typeSize)
+inline cl_uint getTestScale(size_t typeSize)
 {
     if (gWimpyMode)
     {
@@ -245,7 +244,7 @@ static inline cl_uint getTestScale(size_t typeSize)
     }
 }
 
-static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
+inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
 {
     if (gWimpyMode)
     {
-- 
cgit v1.2.3


From 6c8045911ab193143eae48eef68fc966d0d96b1f Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 18 May 2021 11:10:24 -0600
Subject: gles: Fix compile warnings. (#1070)

* gles: Fix compile warnings.

For 32 and 64-bit Visual Studio and the Android Q NDK.

* Fix formatting violations

Co-authored-by: spauls <spauls@qti.qualcomm.com>
---
 CMakeLists.txt                              |   4 -
 test_common/CMakeLists.txt                  |   5 +-
 test_common/gles/helpers.cpp                |   6 +-
 test_common/gles/helpers.h                  |   5 +-
 test_common/harness/ThreadPool.cpp          |  11 ++-
 test_common/harness/compat.h                |   4 +-
 test_common/harness/conversions.cpp         |   4 +-
 test_common/harness/errorHelpers.cpp        |   6 +-
 test_common/harness/errorHelpers.h          |   5 --
 test_common/harness/fpcontrol.h             |   8 +-
 test_common/harness/imageHelpers.cpp        | 128 +++++++++++++++-------------
 test_common/harness/kernelHelpers.cpp       |   4 +-
 test_common/harness/os_helpers.cpp          |   3 +-
 test_common/harness/propertyHelpers.cpp     |  11 +--
 test_common/harness/rounding_mode.cpp       |  10 +--
 test_common/harness/rounding_mode.h         |   2 -
 test_common/harness/threadTesting.cpp       |  98 ---------------------
 test_common/harness/threadTesting.h         |   5 +-
 test_conformance/gles/CMakeLists.txt        |   8 ++
 test_conformance/gles/setup_egl.cpp         |   5 +-
 test_conformance/gles/test_fence_sync.cpp   |  10 ++-
 test_conformance/gles/test_images_2D.cpp    |   2 +
 test_conformance/gles/test_renderbuffer.cpp |   2 +
 23 files changed, 136 insertions(+), 210 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 083ea96d..5b1f48fd 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -152,10 +152,6 @@ if(LINK_PTHREAD)
     list(APPEND CLConform_LIBRARIES pthread)
 endif()
 
-if(DEFINED USE_GLES3)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGLES3")
-endif()
-
 if(APPLE)
     find_library(corefoundation CoreFoundation)
     find_library(iokit IOKit)
diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt
index 2d4bc190..61580300 100644
--- a/test_common/CMakeLists.txt
+++ b/test_common/CMakeLists.txt
@@ -1,6 +1,5 @@
 
 set(HARNESS_SOURCES
-    harness/threadTesting.cpp
     harness/typeWrappers.cpp
     harness/mt19937.cpp
     harness/conversions.cpp
@@ -23,3 +22,7 @@ set(HARNESS_SOURCES
 
 add_library(harness STATIC ${HARNESS_SOURCES})
 
+if(MSVC)
+    # Don't warn about using the portable "strdup" function.
+    target_compile_definitions(harness PRIVATE _CRT_NONSTDC_NO_DEPRECATE)
+endif()
\ No newline at end of file
diff --git a/test_common/gles/helpers.cpp b/test_common/gles/helpers.cpp
index 34f40b4c..57a4ddc1 100644
--- a/test_common/gles/helpers.cpp
+++ b/test_common/gles/helpers.cpp
@@ -22,7 +22,7 @@
     {GLint __error = glGetError(); if(__error) {log_error( "GL ERROR: %s!\n", gluErrorString( err ));}}
 
 #if defined(__linux__) || defined(GL_ES_VERSION_2_0)
-// On linux we dont link to GLU library to avoid comaptibility issues with
+// On linux we don't link to GLU library to avoid compatibility issues with
 // libstdc++
 // FIXME: Implement this
 const GLubyte* gluErrorString (GLenum error)
@@ -271,8 +271,6 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
     // Read results from the GL texture
     glBindTexture(get_base_gl_target(glTarget), glTexture);
 
-    GLint realWidth, realHeight;
-    GLint realInternalFormat;
     GLenum readBackFormat = GL_RGBA;
     GLenum readBackType = glType;
     glFramebufferWrapper glFramebuffer;
@@ -301,7 +299,7 @@ void * ReadGLTexture( GLenum glTarget, GLuint glTexture,
         GetGLFormatName(readBackFormat),
         GetGLTypeName(readBackType));
 
-    DumpGLBuffer(readBackType, realWidth, realHeight, (void*)outBuffer);
+    DumpGLBuffer(readBackType, outWidth, outHeight, (void *)outBuffer);
 
 #endif
 
diff --git a/test_common/gles/helpers.h b/test_common/gles/helpers.h
index 5bd0fdf1..20768787 100644
--- a/test_common/gles/helpers.h
+++ b/test_common/gles/helpers.h
@@ -30,11 +30,10 @@
 
 #if !defined (__APPLE__)
 #include <CL/cl.h>
-#include "gl_headers.h"
 #include <CL/cl_gl.h>
-#else
-#include "gl_headers.h"
+#include <CL/cl_half.h>
 #endif
+#include "gl_headers.h"
 
 #include "harness/errorHelpers.h"
 #include "harness/kernelHelpers.h"
diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 31985aa0..5dae1b4a 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp
@@ -523,7 +523,7 @@ void ThreadPool_Init(void)
                     {
                         // Count the number of bits in ProcessorMask (number of
                         // logical cores)
-                        ULONG mask = ptr->ProcessorMask;
+                        ULONG_PTR mask = ptr->ProcessorMask;
                         while (mask)
                         {
                             ++gThreadCount;
@@ -688,7 +688,10 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
 
 void ThreadPool_Exit(void)
 {
-    int err, count;
+#ifndef _WIN32
+    int err;
+#endif
+    int count;
     gRunCount = CL_INT_MAX;
 
 #if defined(__GNUC__)
@@ -738,7 +741,9 @@ void ThreadPool_Exit(void)
 // all available then it would make more sense to use those features.
 cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
 {
+#ifndef _WIN32
     cl_int newErr;
+#endif
     cl_int err = 0;
     // Lazily set up our threads
 #if defined(_MSC_VER) && (_WIN32_WINNT >= 0x600)
@@ -913,7 +918,9 @@ cl_int ThreadPool_Do(TPFuncPtr func_ptr, cl_uint count, void *userInfo)
 
     err = jobError;
 
+#ifndef _WIN32
 exit:
+#endif
     // exit critical region
 #if defined(_WIN32)
     LeaveCriticalSection(gThreadPoolLock);
diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 7aad15a0..3b557852 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -18,13 +18,13 @@
 
 #if defined(_WIN32) && defined(_MSC_VER)
 #include <Windows.h>
-#endif
-
+#else
 #ifdef __cplusplus
 #define EXTERN_C extern "C"
 #else
 #define EXTERN_C
 #endif
+#endif
 
 
 //
diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index fc3317c7..c7731269 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -181,8 +181,8 @@ static ULong sUpperLimits[kNumExplicitTypes] = {
     0xffffffffLL,
     0xffffffffLL,
     0x7fffffffffffffffLL,
-    0xffffffffffffffffLL,
-    0xffffffffffffffffLL,
+    0xffffffffffffffffULL,
+    0xffffffffffffffffULL,
     0,
     0
 }; // Last two values aren't stored here
diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index 22a2677d..3ddbc37b 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -564,7 +564,7 @@ cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
             error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
                                      &size_ret);
             test_error(error, "Unable to query context's device size");
-            num_devices = size_ret / sizeof(cl_device_id);
+            num_devices = static_cast<cl_uint>(size_ret / sizeof(cl_device_id));
             device_list = (cl_device_id *)malloc(size_ret);
             if (device_list == NULL)
             {
@@ -695,7 +695,7 @@ int check_functions_for_offline_compiler(const char *subtestname,
 {
     if (gCompilationMode != kOnline)
     {
-        int nNotRequiredWithOfflineCompiler =
+        size_t nNotRequiredWithOfflineCompiler =
             sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
         size_t i;
         for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
@@ -707,4 +707,4 @@ int check_functions_for_offline_compiler(const char *subtestname,
         }
     }
     return 0;
-}
+}
\ No newline at end of file
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index 19446014..c7f49e3d 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -56,11 +56,6 @@ static int vlog_win32(const char *format, ...);
 #define vlog printf
 #endif
 
-#define ct_assert(b) ct_assert_i(b, __LINE__)
-#define ct_assert_i(b, line) ct_assert_ii(b, line)
-#define ct_assert_ii(b, line)                                                  \
-    int _compile_time_assertion_on_line_##line[b ? 1 : -1];
-
 #define test_fail(msg, ...)                                                    \
     {                                                                          \
         log_error(msg, ##__VA_ARGS__);                                         \
diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 9f065044..2add9baf 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -30,7 +30,11 @@
 // that rounding mode.
 #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
     || defined(__MINGW32__)
+#ifdef _MSC_VER
 typedef int FPU_mode_type;
+#else
+typedef int64_t FPU_mode_type;
+#endif
 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
     || defined(__MINGW32__)
 #include <xmmintrin.h>
@@ -55,7 +59,7 @@ inline void ForceFTZ(FPU_mode_type *mode)
     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
     // Add 64 bit support
 #elif defined(__aarch64__)
-    unsigned fpscr;
+    uint64_t fpscr;
     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
     *mode = fpscr;
     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
@@ -81,7 +85,7 @@ inline void DisableFTZ(FPU_mode_type *mode)
     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
     // Add 64 bit support
 #elif defined(__aarch64__)
-    unsigned fpscr;
+    uint64_t fpscr;
     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
     *mode = fpscr;
     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 72a2f0c0..d1754653 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -554,8 +554,8 @@ struct AddressingTable
 {
     AddressingTable()
     {
-        ct_assert((CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6));
-        ct_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2);
+        static_assert(CL_ADDRESS_MIRRORED_REPEAT - CL_ADDRESS_NONE < 6, "");
+        static_assert(CL_FILTER_NEAREST - CL_FILTER_LINEAR < 2, "");
 
         mTable[CL_ADDRESS_NONE - CL_ADDRESS_NONE]
               [CL_FILTER_NEAREST - CL_FILTER_NEAREST] = NoAddressFn;
@@ -719,7 +719,7 @@ void get_max_sizes(
     if (usingMaxPixelSizeBuffer || raw_pixel_size == 12) raw_pixel_size = 16;
     size_t max_pixels = (size_t)maxAllocSize / raw_pixel_size;
 
-    log_info("Maximums: [%ld x %ld x %ld], raw pixel size %lu bytes, "
+    log_info("Maximums: [%zu x %zu x %zu], raw pixel size %zu bytes, "
              "per-allocation limit %gMB.\n",
              maxWidth, maxHeight, isArray ? maxArraySize : maxDepth,
              raw_pixel_size, (maxAllocSize / (1024.0 * 1024.0)));
@@ -760,10 +760,10 @@ void get_max_sizes(
     if (image_type == CL_MEM_OBJECT_IMAGE1D)
     {
 
-        double M = maximum_sizes[0];
+        size_t M = maximum_sizes[0];
 
         // Store the size
-        sizes[(*numberOfSizes)][0] = (size_t)M;
+        sizes[(*numberOfSizes)][0] = M;
         sizes[(*numberOfSizes)][1] = 1;
         sizes[(*numberOfSizes)][2] = 1;
         ++(*numberOfSizes);
@@ -777,17 +777,17 @@ void get_max_sizes(
         {
 
             // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
+            size_t M = maximum_sizes[fixed_dim];
+            size_t A = max_pixels;
 
             int x0_dim = !fixed_dim;
-            double x0 =
+            size_t x0 = static_cast<size_t>(
                 fmin(fmin(other_sizes[(other_size++) % num_other_sizes], A / M),
-                     maximum_sizes[x0_dim]);
+                     maximum_sizes[x0_dim]));
 
             // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
+            sizes[(*numberOfSizes)][fixed_dim] = M;
+            sizes[(*numberOfSizes)][x0_dim] = x0;
             sizes[(*numberOfSizes)][2] = 1;
             ++(*numberOfSizes);
         }
@@ -802,16 +802,17 @@ void get_max_sizes(
         {
 
             // Determine the size of the fixed dimension
-            double M = maximum_sizes[fixed_dim];
-            double A = max_pixels;
+            size_t M = maximum_sizes[fixed_dim];
+            size_t A = max_pixels;
 
             // Find two other dimensions, x0 and x1
             int x0_dim = (fixed_dim == 0) ? 1 : 0;
             int x1_dim = (fixed_dim == 2) ? 1 : 2;
 
             // Choose two other sizes for these dimensions
-            double x0 = fmin(fmin(A / M, maximum_sizes[x0_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
+            size_t x0 = static_cast<size_t>(
+                fmin(fmin(A / M, maximum_sizes[x0_dim]),
+                     other_sizes[(other_size++) % num_other_sizes]));
             // GPUs have certain restrictions on minimum width (row alignment)
             // of images which has given us issues testing small widths in this
             // test (say we set width to 3 for testing, and compute size based
@@ -820,8 +821,9 @@ void get_max_sizes(
             // width of 16 which doesnt fit in vram). For this purpose we are
             // not testing width < 16 for this test.
             if (x0_dim == 0 && x0 < 16) x0 = 16;
-            double x1 = fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
-                             other_sizes[(other_size++) % num_other_sizes]);
+            size_t x1 = static_cast<size_t>(
+                fmin(fmin(A / M / x0, maximum_sizes[x1_dim]),
+                     other_sizes[(other_size++) % num_other_sizes]));
 
             // Valid image sizes cannot be below 1. Due to the workaround for
             // the xo_dim where x0 is overidden to 16 there might not be enough
@@ -834,9 +836,9 @@ void get_max_sizes(
             assert(x0 > 0 && M > 0);
 
             // Store the size
-            sizes[(*numberOfSizes)][fixed_dim] = (size_t)M;
-            sizes[(*numberOfSizes)][x0_dim] = (size_t)x0;
-            sizes[(*numberOfSizes)][x1_dim] = (size_t)x1;
+            sizes[(*numberOfSizes)][fixed_dim] = M;
+            sizes[(*numberOfSizes)][x0_dim] = x0;
+            sizes[(*numberOfSizes)][x1_dim] = x1;
             ++(*numberOfSizes);
         }
     }
@@ -847,20 +849,20 @@ void get_max_sizes(
         switch (image_type)
         {
             case CL_MEM_OBJECT_IMAGE1D:
-                log_info(" size[%d] = [%ld] (%g MB image)\n", j, sizes[j][0],
+                log_info(" size[%d] = [%zu] (%g MB image)\n", j, sizes[j][0],
                          raw_pixel_size * sizes[j][0] * sizes[j][1]
                              * sizes[j][2] / (1024.0 * 1024.0));
                 break;
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
             case CL_MEM_OBJECT_IMAGE2D:
-                log_info(" size[%d] = [%ld %ld] (%g MB image)\n", j,
+                log_info(" size[%d] = [%zu %zu] (%g MB image)\n", j,
                          sizes[j][0], sizes[j][1],
                          raw_pixel_size * sizes[j][0] * sizes[j][1]
                              * sizes[j][2] / (1024.0 * 1024.0));
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
             case CL_MEM_OBJECT_IMAGE3D:
-                log_info(" size[%d] = [%ld %ld %ld] (%g MB image)\n", j,
+                log_info(" size[%d] = [%zu %zu %zu] (%g MB image)\n", j,
                          sizes[j][0], sizes[j][1], sizes[j][2],
                          raw_pixel_size * sizes[j][0] * sizes[j][1]
                              * sizes[j][2] / (1024.0 * 1024.0));
@@ -1124,12 +1126,13 @@ void escape_inf_nan_values(char *data, size_t allocSize)
 char *generate_random_image_data(image_descriptor *imageInfo,
                                  BufferOwningPtr<char> &P, MTdata d)
 {
-    size_t allocSize = get_image_size(imageInfo);
+    size_t allocSize = static_cast<size_t>(get_image_size(imageInfo));
     size_t pixelRowBytes = imageInfo->width * get_pixel_size(imageInfo->format);
     size_t i;
 
     if (imageInfo->num_mip_levels > 1)
-        allocSize = compute_mipmapped_image_size(*imageInfo);
+        allocSize =
+            static_cast<size_t>(compute_mipmapped_image_size(*imageInfo));
 
 #if defined(__APPLE__)
     char *data = NULL;
@@ -1161,7 +1164,7 @@ char *generate_random_image_data(image_descriptor *imageInfo,
 
     if (data == NULL)
     {
-        log_error("ERROR: Unable to malloc %lu bytes for "
+        log_error("ERROR: Unable to malloc %zu bytes for "
                   "generate_random_image_data\n",
                   allocSize);
         return 0;
@@ -1678,24 +1681,26 @@ bool get_integer_coords_offset(float x, float y, float z, float xAddressOffset,
 
     // At this point, we're dealing with non-normalized coordinates.
 
-    outX = adFn(floorf(x), width);
+    outX = adFn(static_cast<int>(floorf(x)), width);
 
     // 1D and 2D arrays require special care for the index coordinate:
 
     switch (imageInfo->type)
     {
         case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-            outY = calculate_array_index(y, (float)imageInfo->arraySize - 1.0f);
-            outZ = 0.0f; /* don't care! */
+            outY = static_cast<int>(
+                calculate_array_index(y, (float)imageInfo->arraySize - 1.0f));
+            outZ = 0; /* don't care! */
             break;
         case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-            outY = adFn(floorf(y), height);
-            outZ = calculate_array_index(z, (float)imageInfo->arraySize - 1.0f);
+            outY = adFn(static_cast<int>(floorf(y)), height);
+            outZ = static_cast<int>(
+                calculate_array_index(z, (float)imageInfo->arraySize - 1.0f));
             break;
         default:
             // legacy path:
-            if (height != 0) outY = adFn(floorf(y), height);
-            if (depth != 0) outZ = adFn(floorf(z), depth);
+            if (height != 0) outY = adFn(static_cast<int>(floorf(y)), height);
+            if (depth != 0) outZ = adFn(static_cast<int>(floorf(z)), depth);
     }
 
     return !((int)refX == outX && (int)refY == outY && (int)refZ == outZ);
@@ -1766,7 +1771,7 @@ static float unnormalize_coordinate(const char *name, float coord, float offset,
     switch (addressing_mode)
     {
         case CL_ADDRESS_REPEAT:
-            ret = RepeatNormalizedAddressFn(coord, extent);
+            ret = RepeatNormalizedAddressFn(coord, static_cast<size_t>(extent));
 
             if (verbose)
             {
@@ -1790,7 +1795,8 @@ static float unnormalize_coordinate(const char *name, float coord, float offset,
             break;
 
         case CL_ADDRESS_MIRRORED_REPEAT:
-            ret = MirroredRepeatNormalizedAddressFn(coord, extent);
+            ret = MirroredRepeatNormalizedAddressFn(
+                coord, static_cast<size_t>(extent));
 
             if (verbose)
             {
@@ -1968,13 +1974,13 @@ FloatPixel sample_image_pixel_float_offset(
         // coordinates.  Note that the array cases again require special
         // care, per section 8.4 in the OpenCL 1.2 Specification.
 
-        ix = adFn(floorf(x), width_lod);
+        ix = adFn(static_cast<int>(floorf(x)), width_lod);
 
         switch (imageInfo->type)
         {
             case CL_MEM_OBJECT_IMAGE1D_ARRAY:
-                iy =
-                    calculate_array_index(y, (float)(imageInfo->arraySize - 1));
+                iy = static_cast<int>(calculate_array_index(
+                    y, (float)(imageInfo->arraySize - 1)));
                 iz = 0;
                 if (verbose)
                 {
@@ -1982,18 +1988,18 @@ FloatPixel sample_image_pixel_float_offset(
                 }
                 break;
             case CL_MEM_OBJECT_IMAGE2D_ARRAY:
-                iy = adFn(floorf(y), height_lod);
-                iz =
-                    calculate_array_index(z, (float)(imageInfo->arraySize - 1));
+                iy = adFn(static_cast<int>(floorf(y)), height_lod);
+                iz = static_cast<int>(calculate_array_index(
+                    z, (float)(imageInfo->arraySize - 1)));
                 if (verbose)
                 {
                     log_info("\tArray index %f evaluates to %d\n", z, iz);
                 }
                 break;
             default:
-                iy = adFn(floorf(y), height_lod);
+                iy = adFn(static_cast<int>(floorf(y)), height_lod);
                 if (depth_lod != 0)
-                    iz = adFn(floorf(z), depth_lod);
+                    iz = adFn(static_cast<int>(floorf(z)), depth_lod);
                 else
                     iz = 0;
         }
@@ -2047,16 +2053,16 @@ FloatPixel sample_image_pixel_float_offset(
                 height = 1;
             }
 
-            int x1 = adFn(floorf(x - 0.5f), width);
+            int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width);
             int y1 = 0;
-            int x2 = adFn(floorf(x - 0.5f) + 1, width);
+            int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width);
             int y2 = 0;
             if ((imageInfo->type != CL_MEM_OBJECT_IMAGE1D)
                 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                 && (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_BUFFER))
             {
-                y1 = adFn(floorf(y - 0.5f), height);
-                y2 = adFn(floorf(y - 0.5f) + 1, height);
+                y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height);
+                y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height);
             }
             else
             {
@@ -2147,12 +2153,12 @@ FloatPixel sample_image_pixel_float_offset(
         else
         {
             // 3D linear filtering
-            int x1 = adFn(floorf(x - 0.5f), width_lod);
-            int y1 = adFn(floorf(y - 0.5f), height_lod);
-            int z1 = adFn(floorf(z - 0.5f), depth_lod);
-            int x2 = adFn(floorf(x - 0.5f) + 1, width_lod);
-            int y2 = adFn(floorf(y - 0.5f) + 1, height_lod);
-            int z2 = adFn(floorf(z - 0.5f) + 1, depth_lod);
+            int x1 = adFn(static_cast<int>(floorf(x - 0.5f)), width_lod);
+            int y1 = adFn(static_cast<int>(floorf(y - 0.5f)), height_lod);
+            int z1 = adFn(static_cast<int>(floorf(z - 0.5f)), depth_lod);
+            int x2 = adFn(static_cast<int>(floorf(x - 0.5f) + 1), width_lod);
+            int y2 = adFn(static_cast<int>(floorf(y - 0.5f) + 1), height_lod);
+            int z2 = adFn(static_cast<int>(floorf(z - 0.5f) + 1), depth_lod);
 
             if (verbose)
                 log_info("\tActual integer coords used (i = floor(x-.5)): "
@@ -2899,15 +2905,18 @@ void pack_image_pixel_error(const float *srcVector,
         case CL_UNSIGNED_INT8: {
             const cl_uchar *ptr = (const cl_uchar *)results;
             for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX);
+                errors[i] = static_cast<float>(
+                    (cl_int)ptr[i]
+                    - (cl_int)CONVERT_UINT(srcVector[i], 255.f, CL_UCHAR_MAX));
             break;
         }
         case CL_UNSIGNED_INT16: {
             const cl_ushort *ptr = (const cl_ushort *)results;
             for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_int)ptr[i]
-                    - (cl_int)CONVERT_UINT(srcVector[i], 32767.f, CL_USHRT_MAX);
+                errors[i] = static_cast<float>(
+                    (cl_int)ptr[i]
+                    - (cl_int)CONVERT_UINT(srcVector[i], 32767.f,
+                                           CL_USHRT_MAX));
             break;
         }
         case CL_UNSIGNED_INT32: {
@@ -3228,7 +3237,7 @@ char *create_random_image_data(ExplicitType dataType,
     if (data == NULL)
     {
         log_error(
-            "ERROR: Unable to malloc %lu bytes for create_random_image_data\n",
+            "ERROR: Unable to malloc %zu bytes for create_random_image_data\n",
             allocSize);
         return NULL;
     }
@@ -3988,7 +3997,8 @@ bool is_image_format_required(cl_image_format format, cl_mem_flags flags,
 
 cl_uint compute_max_mip_levels(size_t width, size_t height, size_t depth)
 {
-    cl_uint retMaxMipLevels = 0, max_dim = 0;
+    cl_uint retMaxMipLevels = 0;
+    size_t max_dim = 0;
 
     max_dim = width;
     max_dim = height > max_dim ? height : max_dim;
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 95b9555e..aaf0d689 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -579,7 +579,7 @@ static int create_single_kernel_helper_create_program_offline(
     if (error != CL_SUCCESS) return error;
 
     ifs.seekg(0, ifs.end);
-    int length = ifs.tellg();
+    size_t length = static_cast<size_t>(ifs.tellg());
     ifs.seekg(0, ifs.beg);
 
     // treat modifiedProgram as input for clCreateProgramWithBinary
@@ -1226,7 +1226,7 @@ int is_image_format_supported(cl_context context, cl_mem_flags flags,
     list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
     if (NULL == list)
     {
-        log_error("Error: unable to allocate %ld byte buffer for image format "
+        log_error("Error: unable to allocate %zu byte buffer for image format "
                   "list at %s:%d (err = %d)\n",
                   count * sizeof(cl_image_format), __FILE__, __LINE__, err);
         return 0;
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index cd350cf8..daf21958 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -404,7 +404,8 @@ std::string exe_path()
     for (;;)
     {
 
-        DWORD len = GetModuleFileNameA(NULL, &path.front(), path.size());
+        DWORD len = GetModuleFileNameA(NULL, &path.front(),
+                                       static_cast<DWORD>(path.size()));
 
         if (len == 0)
         {
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
index 3157ca80..e368f9b6 100644
--- a/test_common/harness/propertyHelpers.cpp
+++ b/test_common/harness/propertyHelpers.cpp
@@ -97,15 +97,16 @@ int compareProperties(const std::vector<cl_properties>& queried,
 
             if (!found)
             {
-                log_error("ERROR: expected property 0x%x not found!\n",
+                log_error("ERROR: expected property 0x%llx not found!\n",
                           check_prop);
                 return TEST_FAIL;
             }
             else if (check_value != queried_value)
             {
-                log_error("ERROR: mis-matched value for property 0x%x: wanted "
-                          "0x%x, got 0x%x\n",
-                          check_prop, check_value, queried_value);
+                log_error(
+                    "ERROR: mis-matched value for property 0x%llx: wanted "
+                    "0x%llx, got 0x%llx\n",
+                    check_prop, check_value, queried_value);
                 return TEST_FAIL;
             }
         }
@@ -113,7 +114,7 @@ int compareProperties(const std::vector<cl_properties>& queried,
         if (queried.size() > check.size())
         {
             log_error("ERROR: all properties found but there are extra "
-                      "properties: expected %d, got %d.\n",
+                      "properties: expected %zu, got %zu.\n",
                       check.size(), queried.size());
             return TEST_FAIL;
         }
diff --git a/test_common/harness/rounding_mode.cpp b/test_common/harness/rounding_mode.cpp
index 681ccdd8..1f531478 100644
--- a/test_common/harness/rounding_mode.cpp
+++ b/test_common/harness/rounding_mode.cpp
@@ -48,7 +48,7 @@ RoundingMode set_round(RoundingMode r, Type outType)
     const int *p = int_rounds;
     if (outType == kfloat || outType == kdouble) p = flt_rounds;
 
-    int fpscr = 0;
+    int64_t fpscr = 0;
     RoundingMode oldRound = get_round();
 
     _FPU_GETCW(fpscr);
@@ -59,7 +59,7 @@ RoundingMode set_round(RoundingMode r, Type outType)
 
 RoundingMode get_round(void)
 {
-    int fpscr;
+    int64_t fpscr;
     int oldRound;
 
     _FPU_GETCW(fpscr);
@@ -203,13 +203,13 @@ void *FlushToZero(void)
 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)
     union {
-        int i;
+        unsigned int i;
         void *p;
     } u = { _mm_getcsr() };
     _mm_setcsr(u.i | 0x8040);
     return u.p;
 #elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
+    int64_t fpscr;
     _FPU_GETCW(fpscr);
     _FPU_SETCW(fpscr | FPSCR_FZ);
     return NULL;
@@ -239,7 +239,7 @@ void UnFlushToZero(void *p)
     } u = { p };
     _mm_setcsr(u.i);
 #elif defined(__arm__) || defined(__aarch64__)
-    int fpscr;
+    int64_t fpscr;
     _FPU_GETCW(fpscr);
     _FPU_SETCW(fpscr & ~FPSCR_FZ);
 #elif defined(__PPC__)
diff --git a/test_common/harness/rounding_mode.h b/test_common/harness/rounding_mode.h
index 064a3a63..6f52f0a0 100644
--- a/test_common/harness/rounding_mode.h
+++ b/test_common/harness/rounding_mode.h
@@ -16,8 +16,6 @@
 #ifndef __ROUNDING_MODE_H__
 #define __ROUNDING_MODE_H__
 
-#pragma STDC FENV_ACCESS ON
-
 #include "compat.h"
 
 #if (defined(_WIN32) && defined(_MSC_VER))
diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp
index 875ee59b..e69de29b 100644
--- a/test_common/harness/threadTesting.cpp
+++ b/test_common/harness/threadTesting.cpp
@@ -1,98 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "compat.h"
-#include "threadTesting.h"
-#include "errorHelpers.h"
-#include <stdio.h>
-#include <string.h>
-
-#if !defined(_WIN32)
-#include <pthread.h>
-#endif
-
-#if 0 // Disabed for now
-
-typedef struct
-{
-    basefn            mFunction;
-    cl_device_id    mDevice;
-    cl_context        mContext;
-    int                mNumElements;
-} TestFnArgs;
-
-////////////////////////////////////////////////////////////////////////////////
-// Thread-based testing. Spawns a new thread to run the given test function,
-// then waits for it to complete. The entire idea is that, if the thread crashes,
-// we can catch it and report it as a failure instead of crashing the entire suite
-////////////////////////////////////////////////////////////////////////////////
-
-void *test_thread_wrapper( void *data )
-{
-    TestFnArgs *args;
-    int retVal;
-    cl_context context;
-
-    args = (TestFnArgs *)data;
-
-    /* Create a new context to use (contexts can't cross threads) */
-    context = clCreateContext(NULL, args->mDeviceGroup);
-    if( context == NULL )
-    {
-        log_error("clCreateContext failed for new thread\n");
-        return (void *)(-1);
-    }
-
-    /* Call function */
-    retVal = args->mFunction( args->mDeviceGroup, args->mDevice, context, args->mNumElements );
-
-    clReleaseContext( context );
-
-    return (void *)retVal;
-}
-
-int test_threaded_function( basefn fnToTest, cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
-{
-    int error;
-    pthread_t threadHdl;
-    void *retVal;
-    TestFnArgs args;
-
-
-    args.mFunction = fnToTest;
-    args.mDeviceGroup = deviceGroup;
-    args.mDevice = device;
-    args.mContext = context;
-    args.mNumElements = numElements;
-
-
-    error = pthread_create( &threadHdl, NULL, test_thread_wrapper, (void *)&args );
-    if( error != 0 )
-    {
-        log_error( "ERROR: Unable to create thread for testing!\n" );
-        return -1;
-    }
-
-    /* Thread has been started, now just wait for it to complete (or crash) */
-    error = pthread_join( threadHdl, &retVal );
-    if( error != 0 )
-    {
-        log_error( "ERROR: Unable to join testing thread!\n" );
-        return -1;
-    }
-
-    return (int)((intptr_t)retVal);
-}
-#endif
diff --git a/test_common/harness/threadTesting.h b/test_common/harness/threadTesting.h
index 765eabcc..2f3c1873 100644
--- a/test_common/harness/threadTesting.h
+++ b/test_common/harness/threadTesting.h
@@ -24,8 +24,5 @@
 
 typedef int (*basefn)(cl_device_id deviceID, cl_context context,
                       cl_command_queue queue, int num_elements);
-extern int test_threaded_function(basefn fnToTest, cl_device_id device,
-                                  cl_context context, cl_command_queue queue,
-                                  int numElements);
 
-#endif // _threadTesting_h
+#endif // _threadTesting_h
\ No newline at end of file
diff --git a/test_conformance/gles/CMakeLists.txt b/test_conformance/gles/CMakeLists.txt
index c76fe512..4f4ba532 100644
--- a/test_conformance/gles/CMakeLists.txt
+++ b/test_conformance/gles/CMakeLists.txt
@@ -18,3 +18,11 @@ set (${MODULE_NAME}_SOURCES
 list(APPEND CLConform_LIBRARIES EGL GLESv2)
 
 include(../CMakeCommon.txt)
+
+if(DEFINED USE_GLES3)
+    target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE GLES3)
+endif()
+if(MSVC)
+    # Don't warn about using the portable "strdup" function.
+    target_compile_definitions(${${MODULE_NAME}_OUT} PRIVATE _CRT_NONSTDC_NO_DEPRECATE)
+endif()
\ No newline at end of file
diff --git a/test_conformance/gles/setup_egl.cpp b/test_conformance/gles/setup_egl.cpp
index fe0f8ca3..95a12a66 100644
--- a/test_conformance/gles/setup_egl.cpp
+++ b/test_conformance/gles/setup_egl.cpp
@@ -117,7 +117,8 @@ public:
                 _platform, "clGetGLContextInfoKHR");
         if (GetGLContextInfo == NULL)
         {
-            print_error(status, "clGetGLContextInfoKHR failed");
+            log_error("ERROR: clGetGLContextInfoKHR failed! (%s:%d)\n",
+                      __FILE__, __LINE__);
             return NULL;
         }
 
@@ -128,7 +129,7 @@ public:
             return NULL;
         }
         dev_size /= sizeof(cl_device_id);
-        log_info("GL _context supports %d compute devices\n", dev_size);
+        log_info("GL _context supports %zu compute devices\n", dev_size);
 
         status =
             GetGLContextInfo(properties, CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR,
diff --git a/test_conformance/gles/test_fence_sync.cpp b/test_conformance/gles/test_fence_sync.cpp
index 0af91a46..968d9695 100644
--- a/test_conformance/gles/test_fence_sync.cpp
+++ b/test_conformance/gles/test_fence_sync.cpp
@@ -570,10 +570,12 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
                         {
                             if( p[ t ] == 0 )
                             {
-                                log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
-                                          (int)( a % framebufferSize ), (int)( a / framebufferSize ),
-                                          (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
-                                          p[ a ] );
+                                log_error(
+                                    "RUN: %zu to %zu (%d,%d to %d,%d) 0x%08x\n",
+                                    a, t - 1, (int)(a % framebufferSize),
+                                    (int)(a / framebufferSize),
+                                    (int)((t - 1) % framebufferSize),
+                                    (int)((t - 1) / framebufferSize), p[a]);
                                 a = t;
                             }
                         }
diff --git a/test_conformance/gles/test_images_2D.cpp b/test_conformance/gles/test_images_2D.cpp
index c1a17fc8..f6554023 100644
--- a/test_conformance/gles/test_images_2D.cpp
+++ b/test_conformance/gles/test_images_2D.cpp
@@ -369,7 +369,9 @@ int test_images_read_cube( cl_device_id device, cl_context context, cl_command_q
 }
 
 
+#ifdef __APPLE__
 #pragma mark -------------------- Write tests -------------------------
+#endif
 
 
 int test_cl_image_write( cl_context context, cl_command_queue queue, cl_mem clImage,
diff --git a/test_conformance/gles/test_renderbuffer.cpp b/test_conformance/gles/test_renderbuffer.cpp
index 20127aca..0f6d289b 100644
--- a/test_conformance/gles/test_renderbuffer.cpp
+++ b/test_conformance/gles/test_renderbuffer.cpp
@@ -197,7 +197,9 @@ int test_renderbuffer_read( cl_device_id device, cl_context context, cl_command_
 }
 
 
+#ifdef __APPLE__
 #pragma mark -------------------- Write tests -------------------------
+#endif
 
 int test_attach_renderbuffer_write_to_image( cl_context context, cl_command_queue queue, GLenum glTarget, GLuint glRenderbuffer,
                      size_t imageWidth, size_t imageHeight, cl_image_format *outFormat, ExplicitType *outType, MTdata d, void **outSourceBuffer )
-- 
cgit v1.2.3


From de49d59c8dfad1171d7dd7c0df929ae3a68aea1a Mon Sep 17 00:00:00 2001
From: Pierre Moreau <pierremoreau@users.noreply.github.com>
Date: Tue, 18 May 2021 19:12:55 +0200
Subject: Allocations fixes (#1245)

* allocations: Run buffer non-blocking even without images

Testing buffer non-blocking should not be dependent on whether images
are supported by a device or not.

* allocations: Fix typos
---
 test_conformance/allocations/allocation_fill.cpp      | 6 ++++--
 test_conformance/allocations/allocation_functions.cpp | 4 ++--
 test_conformance/allocations/main.cpp                 | 6 ++++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/test_conformance/allocations/allocation_fill.cpp b/test_conformance/allocations/allocation_fill.cpp
index a7558942..b4ea3798 100644
--- a/test_conformance/allocations/allocation_fill.cpp
+++ b/test_conformance/allocations/allocation_fill.cpp
@@ -200,8 +200,10 @@ int fill_image_with_data(cl_context context, cl_device_id device_id, cl_command_
       result = clFinish(*queue);
       if (result != SUCCEEDED)
       {
-        print_error(error, "clFinish failed after successful enquing filling buffer with data.");
-        return result;
+          print_error(error,
+                      "clFinish failed after successful enqueuing filling "
+                      "buffer with data.");
+          return result;
       }
     } else {
       error = clEnqueueWriteImage(*queue, mem, CL_FALSE, origin, region, 0, 0, data, 0, NULL, &event);
diff --git a/test_conformance/allocations/allocation_functions.cpp b/test_conformance/allocations/allocation_functions.cpp
index 7182c727..827ee104 100644
--- a/test_conformance/allocations/allocation_functions.cpp
+++ b/test_conformance/allocations/allocation_functions.cpp
@@ -37,8 +37,8 @@ int find_good_image_size(cl_device_id device_id, size_t size_to_allocate, size_t
   }
 
   if (size_to_allocate == 0) {
-    log_error("Trying to allcoate a zero sized image.\n");
-    return FAILED_ABORT;
+      log_error("Trying to allocate a zero sized image.\n");
+      return FAILED_ABORT;
   }
 
   error = clGetDeviceInfo( device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( max_width ), &max_width, NULL );
diff --git a/test_conformance/allocations/main.cpp b/test_conformance/allocations/main.cpp
index 0dec4c6d..43e81277 100644
--- a/test_conformance/allocations/main.cpp
+++ b/test_conformance/allocations/main.cpp
@@ -112,6 +112,8 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
     int number_of_mems_used;
     cl_ulong max_individual_allocation_size = g_max_individual_allocation_size;
     cl_ulong global_mem_size = g_global_mem_size ;
+    const bool allocate_image =
+        (alloc_type != BUFFER) && (alloc_type != BUFFER_NON_BLOCKING);
 
     static const char* alloc_description[] = {
         "buffer(s)",
@@ -123,7 +125,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
     };
 
     // Skip image tests if we don't support images on the device
-    if( alloc_type > BUFFER && checkForImageSupport( device ) )
+    if (allocate_image && checkForImageSupport(device))
     {
         log_info( "Can not test image allocation because device does not support images.\n" );
         return 0;
@@ -132,7 +134,7 @@ int doTest( cl_device_id device, cl_context context, cl_command_queue queue, All
     // This section was added in order to fix a bug in the test
     // If CL_DEVICE_MAX_MEM_ALLOC_SIZE is much grater than CL_DEVICE_IMAGE2D_MAX_WIDTH * CL_DEVICE_IMAGE2D_MAX_HEIGHT
     // The test will fail in image allocations as the size requested for the allocation will be much grater than the maximum size allowed for image
-    if( ( alloc_type != BUFFER ) && ( alloc_type != BUFFER_NON_BLOCKING ) )
+    if (allocate_image)
     {
         size_t max_width, max_height;
 
-- 
cgit v1.2.3


From 01aa55029d49a7c788e9edd97ff686816ff84267 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Fri, 21 May 2021 10:06:13 +0100
Subject: Update warning options (#1252)

Remove workaround for #783, this was fixed by #1237.

Remove workaround for overflow, #699 has been merged.

Disable errors from -Wimplicit-const-int-float-conversion, the issue is
covered by #1250.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 CMakeLists.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5b1f48fd..8d947ed1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -106,10 +106,9 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-format)
     add_cxx_flag_if_supported(-Werror)
     add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
-    add_cxx_flag_if_supported(-Wno-error=absolute-value) # Issue 783
     add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785
     add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
-    add_cxx_flag_if_supported(-Wno-error=overflow) # Fixed by #699
+    add_cxx_flag_if_supported(-Wno-error=implicit-const-int-float-conversion) # Issue #1250
 
     # -msse -mfpmath=sse to force gcc to use sse for float math,
     # avoiding excess precision problems that cause tests like int2float
-- 
cgit v1.2.3


From ce1687a408686d38e2629a4426ef7c38e10f0e23 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Fri, 21 May 2021 05:07:12 -0400
Subject: Add missing cstdint include (#1259)

---
 test_common/harness/fpcontrol.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test_common/harness/fpcontrol.h b/test_common/harness/fpcontrol.h
index 2add9baf..222aa2c4 100644
--- a/test_common/harness/fpcontrol.h
+++ b/test_common/harness/fpcontrol.h
@@ -16,6 +16,8 @@
 #ifndef _fpcontrol_h
 #define _fpcontrol_h
 
+#include <cstdint>
+
 // In order to get tests for correctly rounded operations (e.g. multiply) to
 // work properly we need to be able to set the reference hardware to FTZ mode if
 // the device hardware is running in that mode.  We have explored all other
-- 
cgit v1.2.3


From ba9312e4a2e2431a716150a3df3491834076d046 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Fri, 21 May 2021 10:07:54 +0100
Subject: Fix ODR violations in math_brute_force (#1255)

A program having a type (such as ThreadInfo) defined differently in
multiple translation units exhibits undefined behaviour.

This commit fixes such issues in the math_brute_force component by
ensuring most types are local to their translation unit with the help of
anonymous namespaces. Later refactoring will be able to extract common
definitions to a single place.

This patch also removes unnecessary static and typedef keywords.
Otherwise, code is only moved around with no change.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 .../math_brute_force/binary_double.cpp             | 396 ++++++++++----------
 test_conformance/math_brute_force/binary_float.cpp | 398 ++++++++++----------
 .../math_brute_force/binary_i_double.cpp           | 399 ++++++++++----------
 .../math_brute_force/binary_i_float.cpp            | 401 ++++++++++----------
 .../math_brute_force/binary_operator_double.cpp    | 392 ++++++++++----------
 .../math_brute_force/binary_operator_float.cpp     | 396 ++++++++++----------
 .../binary_two_results_i_double.cpp                |  20 +-
 .../binary_two_results_i_float.cpp                 |  20 +-
 .../math_brute_force/function_list.cpp             |   1 +
 .../math_brute_force/i_unary_double.cpp            |  14 +-
 .../math_brute_force/i_unary_float.cpp             |  14 +-
 .../math_brute_force/macro_binary_double.cpp       | 355 +++++++++---------
 .../math_brute_force/macro_binary_float.cpp        | 357 +++++++++---------
 .../math_brute_force/macro_unary_double.cpp        | 323 ++++++++--------
 .../math_brute_force/macro_unary_float.cpp         | 325 ++++++++---------
 test_conformance/math_brute_force/mad_double.cpp   |  14 +-
 test_conformance/math_brute_force/mad_float.cpp    |  14 +-
 .../math_brute_force/ternary_double.cpp            |  18 +-
 .../math_brute_force/ternary_float.cpp             |  18 +-
 test_conformance/math_brute_force/unary_double.cpp | 352 +++++++++---------
 test_conformance/math_brute_force/unary_float.cpp  | 404 +++++++++++----------
 .../math_brute_force/unary_two_results_double.cpp  |  14 +-
 .../math_brute_force/unary_two_results_float.cpp   |  14 +-
 .../unary_two_results_i_double.cpp                 |  16 +-
 .../math_brute_force/unary_two_results_i_float.cpp |  16 +-
 .../math_brute_force/unary_u_double.cpp            |  16 +-
 .../math_brute_force/unary_u_float.cpp             |  14 +-
 27 files changed, 2400 insertions(+), 2321 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 4baa4991..9c6b59b4 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -20,10 +20,12 @@
 
 #include <cstring>
 
+namespace {
+
 const double twoToMinus1022 = MAKE_HEX_DOUBLE(0x1p-1022, 1, -1022);
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -109,7 +111,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -117,9 +119,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -128,7 +130,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -140,9 +142,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -164,10 +166,10 @@ typedef struct TestInfo
     int isNextafter;
     bool relaxedMode; // True if test is running in relaxed mode, false
                       // otherwise.
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -277,195 +279,10 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = 0;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -825,3 +642,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+    test_info.skipNanInf = 0;
+    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 32caafa3..9c7081dc 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -20,10 +20,12 @@
 
 #include <cstring>
 
+namespace {
+
 const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -115,9 +117,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -126,7 +128,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -138,9 +140,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -162,10 +164,10 @@ typedef struct TestInfo
     int isNextafter;
     bool relaxedMode; // True if test is running in relaxed mode, false
                       // otherwise.
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
     -NAN,
     -INFINITY,
     -FLT_MAX,
@@ -267,196 +269,10 @@ static const float specialValues[] = {
     +0.0f,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
-    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
-    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -986,3 +802,189 @@ exit:
     if (overflow) free(overflow);
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
+    test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+    test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
+    test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 69e620aa..2fcc8c10 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -108,7 +110,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -116,9 +118,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -139,9 +141,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -159,10 +161,10 @@ typedef struct TestInfo
     int ftz; // non-zero if running in flush to zero mode
 
     // no special values
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -272,201 +274,18 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static const int specialValuesInt[] = {
+const int specialValuesInt[] = {
     0,       1,  2,  3,  1022,  1023,  1024,   INT_MIN,
     INT_MAX, -1, -2, -3, -1022, -1023, -11024, -INT_MAX,
 };
-static constexpr size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
 
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
+constexpr size_t specialValuesIntCount =
+    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -744,3 +563,187 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    cl_int maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize
+                                         * sizeof(cl_int),
+                                     test_info.subBufferSize * sizeof(cl_int) };
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index e65a9aaf..e1538e3c 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -106,7 +108,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -114,9 +116,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -137,9 +139,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -157,10 +159,10 @@ typedef struct TestInfo
     int ftz; // non-zero if running in flush to zero mode
 
     // no special values
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
     -NAN,
     -INFINITY,
     -FLT_MAX,
@@ -262,204 +264,20 @@ static const float specialValues[] = {
     +0.0f,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static const int specialValuesInt[] = {
+const int specialValuesInt[] = {
     0,           1,           2,           3,          126,        127,
     128,         0x02000001,  0x04000001,  1465264071, 1488522147, -1,
     -2,          -3,          -126,        -127,       -128,       -0x02000001,
     -0x04000001, -1465264071, -1488522147,
 };
-static size_t specialValuesIntCount =
-    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    cl_int maxErrorVal2 = 0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        cl_buffer_region region2 = { i * test_info.subBufferSize
-                                         * sizeof(cl_int),
-                                     test_info.subBufferSize * sizeof(cl_int) };
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
 
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
+constexpr size_t specialValuesIntCount =
+    sizeof(specialValuesInt) / sizeof(specialValuesInt[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -738,3 +556,188 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    cl_int maxErrorVal2 = 0;
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        cl_buffer_region region2 = { i * test_info.subBufferSize
+                                         * sizeof(cl_int),
+                                     test_info.subBufferSize * sizeof(cl_int) };
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region2, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %d}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 21e76c85..605a3144 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -20,9 +20,11 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *operator_symbol, int vectorSize,
+                cl_uint kernel_count, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -108,7 +110,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -116,9 +118,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -127,7 +129,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -139,9 +141,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -161,10 +163,10 @@ typedef struct TestInfo
                       // otherwise.
 
     // no special fields
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -274,192 +276,10 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
-                                           bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -793,3 +613,185 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
+                                           bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index ccaef604..8448af54 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -20,9 +20,11 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *operator_symbol, int vectorSize,
-                       cl_uint kernel_count, cl_kernel *k, cl_program *p,
-                       bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *operator_symbol, int vectorSize,
+                cl_uint kernel_count, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -106,7 +108,7 @@ static int BuildKernel(const char *operator_symbol, int vectorSize,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -114,9 +116,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -125,7 +127,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
@@ -137,9 +139,9 @@ typedef struct ThreadInfo
                            // to 0.
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -159,10 +161,10 @@ typedef struct TestInfo
                       // otherwise.
 
     // no special fields
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
     -NAN,
     -INFINITY,
     -FLT_MAX,
@@ -264,194 +266,10 @@ static const float specialValues[] = {
     +0.0f,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
-                                        bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    double maxErrorVal2 = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -923,3 +741,187 @@ exit:
     if (overflow) free(overflow);
     return error;
 }
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
+                                        bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    double maxErrorVal2 = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+                maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 14f41092..43dc1d30 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -115,16 +117,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
-typedef struct ComputeReferenceInfoD_
+struct ComputeReferenceInfoD
 {
     const double *x;
     const double *y;
@@ -141,9 +143,9 @@ typedef struct ComputeReferenceInfoD_
     long double (*f_ffpI)(long double, long double, int *);
     cl_uint lim;
     cl_uint count;
-} ComputeReferenceInfoD;
+};
 
-static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
+cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
 {
     ComputeReferenceInfoD *cri = (ComputeReferenceInfoD *)userInfo;
     cl_uint lim = cri->lim;
@@ -165,6 +167,8 @@ static cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
     return CL_SUCCESS;
 }
 
+} // anonymous namespace
+
 int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 5ef44b6e..83ceeaab 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -130,7 +132,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
-typedef struct ComputeReferenceInfoF_
+struct ComputeReferenceInfoF
 {
     const float *x;
     const float *y;
@@ -139,9 +141,9 @@ typedef struct ComputeReferenceInfoF_
     double (*f_ffpI)(double, double, int *);
     cl_uint lim;
     cl_uint count;
-} ComputeReferenceInfoF;
+};
 
-static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
+cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
 {
     ComputeReferenceInfoF *cri = (ComputeReferenceInfoF *)userInfo;
     cl_uint lim = cri->lim;
@@ -161,6 +163,8 @@ static cl_int ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
     return CL_SUCCESS;
 }
 
+} // anonymous namespace
+
 int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/function_list.cpp b/test_conformance/math_brute_force/function_list.cpp
index 3edbb485..91736285 100644
--- a/test_conformance/math_brute_force/function_list.cpp
+++ b/test_conformance/math_brute_force/function_list.cpp
@@ -53,6 +53,7 @@
         STRINGIFY(_name), _operator, { NULL }, { NULL }, { NULL }, _ulp, _ulp, \
             _embedded_ulp, INFINITY, INFINITY, _rmode, RELAXED_OFF, _type      \
     }
+
 #define unaryF NULL
 #define i_unaryF NULL
 #define unaryF_u NULL
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 4383fa8b..d09e14c1 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -100,16 +102,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -117,6 +119,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index c803aa32..89b566d9 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -98,16 +100,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -115,6 +117,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d09915f6..11281261 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -107,7 +109,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -115,9 +117,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -126,16 +128,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -150,11 +152,10 @@ typedef struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -264,174 +265,10 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -735,3 +572,167 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (size_t i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index c530cdaf..6475e4bb 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -105,7 +107,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -113,9 +115,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -124,16 +126,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem inBuf2; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     MTdata d;
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -148,11 +150,10 @@ typedef struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+};
 
 // A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
     -NAN,
     -INFINITY,
     -FLT_MAX,
@@ -254,175 +255,10 @@ static const float specialValues[] = {
     +0.0f,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -724,3 +560,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            free_mtdata(test_info.tinfo[i].d);
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            clReleaseMemObject(test_info.tinfo[i].inBuf2);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 00e65a2c..860e4596 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -109,9 +111,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -120,14 +122,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -142,160 +144,9 @@ typedef struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+};
 
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -506,3 +357,153 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     return CL_SUCCESS;
 }
+
+} // anonymous namespace
+
+int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 3c1717ac..58a2a954 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -100,7 +102,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -108,9 +110,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -119,14 +121,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -141,161 +143,9 @@ typedef struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+};
 
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -521,3 +371,154 @@ exit:
 
     return ret;
 }
+
+} // anonymous namespace
+
+int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index a32cd5a8..8e88f9f6 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -113,16 +115,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -130,6 +132,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 095a22ff..0552ba4b 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -111,16 +113,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -128,6 +130,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 606fdc5a..8af136ac 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -23,8 +23,10 @@
 #define CORRECTLY_ROUNDED 0
 #define FLUSHED 1
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -116,16 +118,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -134,7 +136,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -202,9 +204,11 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
+} // anonymous namespace
+
 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                                          bool relaxedMode)
 {
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index e52c0a0f..c69083ad 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -23,8 +23,10 @@
 #define CORRECTLY_ROUNDED 0
 #define FLUSHED 1
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -114,16 +116,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -132,7 +134,7 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
     -NAN,
     -INFINITY,
     -FLT_MAX,
@@ -210,9 +212,11 @@ static const float specialValues[] = {
     +0.0f,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
+} // anonymous namespace
+
 int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index f6fa3264..dcd21884 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -101,7 +103,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -109,9 +111,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -120,16 +122,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -151,174 +153,9 @@ typedef struct TestInfo
     float half_sin_cos_tan_limit;
     bool relaxedMode; // True if test is running in relaxed mode, false
                       // otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = f->double_ulps;
-    test_info.ftz = f->ftz || gForceFTZ;
-    test_info.relaxedMode = relaxedMode;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
+};
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -547,3 +384,168 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     return CL_SUCCESS;
 }
+
+} // anonymous namespace
+
+int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = f->double_ulps;
+    test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 17edc58d..f176fb95 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -99,7 +101,7 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
@@ -107,9 +109,9 @@ typedef struct BuildKernelInfo
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -118,16 +120,16 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
     cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+};
 
-typedef struct TestInfo
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
@@ -149,200 +151,9 @@ typedef struct TestInfo
     float half_sin_cos_tan_limit;
     bool relaxedMode; // True if test is running in relaxed mode, false
                       // otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
-    TestInfo test_info;
-    cl_int error;
-    float maxError = 0.0f;
-    double maxErrorVal = 0.0;
-    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
-
-    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_float));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
-    test_info.ftz =
-        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
-    test_info.relaxedMode = relaxedMode;
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (cl_uint i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_float),
-            test_info.subBufferSize * sizeof(cl_float)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-    }
-
-    // Check for special cases for unary float
-    test_info.isRangeLimited = 0;
-    test_info.half_sin_cos_tan_limit = 0;
-    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit = 1.0f
-            + test_info.ulps
-                * (FLT_EPSILON / 2.0f); // out of range results from finite
-                                        // inputs must be in [-1,1]
-    }
-    else if (0 == strcmp(f->name, "half_tan"))
-    {
-        test_info.isRangeLimited = 1;
-        test_info.half_sin_cos_tan_limit =
-            INFINITY; // out of range resut from finite inputs must be numeric
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        // Accumulate the arithmetic errors
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            if (test_info.tinfo[i].maxError > maxError)
-            {
-                maxError = test_info.tinfo[i].maxError;
-                maxErrorVal = test_info.tinfo[i].maxErrorValue;
-            }
-        }
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-
-        if (skipTestingRelaxed)
-        {
-            vlog(" (rlx skip correctness testing)\n");
-            goto exit;
-        }
-
-        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
+};
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
     const TestInfo *job = (const TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
@@ -725,3 +536,194 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     return CL_SUCCESS;
 }
+
+} // anonymous namespace
+
+int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info;
+    cl_int error;
+    float maxError = 0.0f;
+    double maxErrorVal = 0.0;
+    int skipTestingRelaxed = (relaxedMode && strcmp(f->name, "tan") == 0);
+
+    logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+    // Init test_info
+    memset(&test_info, 0, sizeof(test_info));
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_float));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+    test_info.ftz =
+        f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
+        test_info.k[i] = (cl_kernel *)malloc(array_size);
+        if (NULL == test_info.k[i])
+        {
+            vlog_error("Error: Unable to allocate storage for kernels!\n");
+            error = CL_OUT_OF_HOST_MEMORY;
+            goto exit;
+        }
+        memset(test_info.k[i], 0, array_size);
+    }
+    test_info.tinfo =
+        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
+    if (NULL == test_info.tinfo)
+    {
+        vlog_error(
+            "Error: Unable to allocate storage for thread specific data.\n");
+        error = CL_OUT_OF_HOST_MEMORY;
+        goto exit;
+    }
+    memset(test_info.tinfo, 0,
+           test_info.threadCount * sizeof(*test_info.tinfo));
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_float),
+            test_info.subBufferSize * sizeof(cl_float)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+    }
+
+    // Check for special cases for unary float
+    test_info.isRangeLimited = 0;
+    test_info.half_sin_cos_tan_limit = 0;
+    if (0 == strcmp(f->name, "half_sin") || 0 == strcmp(f->name, "half_cos"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit = 1.0f
+            + test_info.ulps
+                * (FLT_EPSILON / 2.0f); // out of range results from finite
+                                        // inputs must be in [-1,1]
+    }
+    else if (0 == strcmp(f->name, "half_tan"))
+    {
+        test_info.isRangeLimited = 1;
+        test_info.half_sin_cos_tan_limit =
+            INFINITY; // out of range resut from finite inputs must be numeric
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info = {
+            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
+            test_info.programs,  f->nameInCode,         relaxedMode
+        };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting || skipTestingRelaxed)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        // Accumulate the arithmetic errors
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            if (test_info.tinfo[i].maxError > maxError)
+            {
+                maxError = test_info.tinfo[i].maxError;
+                maxErrorVal = test_info.tinfo[i].maxErrorValue;
+            }
+        }
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+
+        if (skipTestingRelaxed)
+        {
+            vlog(" (rlx skip correctness testing)\n");
+            goto exit;
+        }
+
+        vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        clReleaseProgram(test_info.programs[i]);
+        if (test_info.k[i])
+        {
+            for (cl_uint j = 0; j < test_info.threadCount; j++)
+                clReleaseKernel(test_info.k[i][j]);
+
+            free(test_info.k[i]);
+        }
+    }
+    if (test_info.tinfo)
+    {
+        for (cl_uint i = 0; i < test_info.threadCount; i++)
+        {
+            clReleaseMemObject(test_info.tinfo[i].inBuf);
+            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
+            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
+        }
+
+        free(test_info.tinfo);
+    }
+
+    return error;
+}
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 71dd4f44..8757fbc4 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -107,16 +109,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -124,6 +126,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 4a375ce3..a54bd024 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -105,16 +107,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -122,6 +124,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 14d1fb99..9ed77dce 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -108,16 +110,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -125,12 +127,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
-static cl_ulong abs_cl_long(cl_long i)
+cl_ulong abs_cl_long(cl_long i)
 {
     cl_long mask = i >> 63;
     return (i ^ mask) - mask;
 }
 
+} // anonymous namespace
+
 int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 23b0d707..d048220b 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -21,8 +21,10 @@
 #include <climits>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -106,16 +108,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -123,12 +125,14 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
-static cl_ulong abs_cl_long(cl_long i)
+cl_ulong abs_cl_long(cl_long i)
 {
     cl_long mask = i >> 63;
     return (i ^ mask) - mask;
 }
 
+} // anonymous namespace
+
 int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 3c5f99da..9478d0bc 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -102,16 +104,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -119,11 +121,13 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
-static cl_ulong random64(MTdata d)
+cl_ulong random64(MTdata d)
 {
     return (cl_ulong)genrand_int32(d) | ((cl_ulong)genrand_int32(d) << 32);
 }
 
+} // anonymous namespace
+
 int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 44c5af47..848a9bac 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -20,8 +20,10 @@
 
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
-                       cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
+                bool relaxedMode)
 {
     const char *c[] = { "__kernel void math_kernel",
                         sizeNames[vectorSize],
@@ -99,16 +101,16 @@ static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-typedef struct BuildKernelInfo
+struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
+};
 
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
@@ -116,6 +118,8 @@ static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
                        info->programs + i, info->relaxedMode);
 }
 
+} // anonymous namespace
+
 int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-- 
cgit v1.2.3


From a08cacc67334788e8135964ca8edce373017ac55 Mon Sep 17 00:00:00 2001
From: ouakheli <53617630+ouakheli@users.noreply.github.com>
Date: Mon, 24 May 2021 11:31:37 +0100
Subject: Fix clang-format-9 install (#1261)

---
 .github/workflows/presubmit.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 0c1778eb..8ef7e663 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -33,7 +33,7 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - name: Install packages
-        run: sudo apt install -y clang-format
+        run: sudo apt install -y clang-format clang-format-9
       - uses: actions/checkout@v2
         with:
           fetch-depth: 0
-- 
cgit v1.2.3


From ed839ebf10c5b7334ac16b0fe13e324f3b47799a Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Mon, 24 May 2021 16:34:54 +0100
Subject: Avoid manual memory management (#1260)

* Avoid manual memory management

Prefer std::vector over malloc and free. This will allow removing goto
statements by leveraging RAII.

Use appropriate type (bool) to store overflow predicates and allocate
std::vector<bool> of appropriate sizes: before this change the
allocation was unnecessary bigger than required.

No longer attempt to catch "out of host memory" issues, given that in
such situation it is generally not possible to cleanly report an error.
Rely on std::bad_alloc exception to report such issues.

Introduce a new header for common code in the math_brute_force
component. It is currently complementary to utility.h and is expected to
hold cleaned up content extracted from future refactoring operations.

List all headers as source in CMake for better compatibility with IDEs.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Remove manual or unnecessary memset

In order to use non-POD types as fields of TestInfo, memset must be
replaced with a compatible zero-initialisation.

Remove an unnecessary memset in MakeKernels.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 test_conformance/math_brute_force/CMakeLists.txt   |  6 ++
 .../math_brute_force/binary_double.cpp             | 76 ++++++++------------
 test_conformance/math_brute_force/binary_float.cpp | 80 ++++++++--------------
 .../math_brute_force/binary_i_double.cpp           | 76 ++++++++------------
 .../math_brute_force/binary_i_float.cpp            | 76 ++++++++------------
 .../math_brute_force/binary_operator_double.cpp    | 76 ++++++++------------
 .../math_brute_force/binary_operator_float.cpp     | 80 ++++++++--------------
 test_conformance/math_brute_force/common.h         | 27 ++++++++
 .../math_brute_force/macro_binary_double.cpp       | 78 ++++++++-------------
 .../math_brute_force/macro_binary_float.cpp        | 76 ++++++++------------
 .../math_brute_force/macro_unary_double.cpp        | 72 +++++++------------
 .../math_brute_force/macro_unary_float.cpp         | 72 +++++++------------
 test_conformance/math_brute_force/main.cpp         |  8 +--
 test_conformance/math_brute_force/unary_double.cpp | 72 +++++++------------
 test_conformance/math_brute_force/unary_float.cpp  | 72 +++++++------------
 15 files changed, 366 insertions(+), 581 deletions(-)
 create mode 100644 test_conformance/math_brute_force/common.h

diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index d8dfc403..28d2716f 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,7 +9,9 @@ set(${MODULE_NAME}_SOURCES
     binary_operator_float.cpp
     binary_two_results_i_double.cpp
     binary_two_results_i_float.cpp
+    common.h
     function_list.cpp
+    function_list.h
     i_unary_double.cpp
     i_unary_float.cpp
     macro_binary_double.cpp
@@ -20,9 +22,12 @@ set(${MODULE_NAME}_SOURCES
     mad_float.cpp
     main.cpp
     reference_math.cpp
+    reference_math.h
     sleep.cpp
+    sleep.h
     ternary_double.cpp
     ternary_float.cpp
+    test_functions.h
     unary_double.cpp
     unary_float.cpp
     unary_two_results_double.cpp
@@ -32,6 +37,7 @@ set(${MODULE_NAME}_SOURCES
     unary_u_double.cpp
     unary_u_float.cpp
     utility.cpp
+    utility.h
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 9c6b59b4..a2b7d28b 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -115,7 +116,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -126,7 +127,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -149,11 +151,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -284,11 +289,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -647,7 +652,7 @@ exit:
 
 int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -656,7 +661,6 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -685,27 +689,10 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -802,27 +789,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 9c7081dc..97712ee8 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -147,11 +149,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -274,18 +279,18 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     bool relaxedMode = job->relaxedMode;
     float ulps = getAllowedUlpError(job->f, relaxedMode);
     MTdata d = tinfo->d;
     cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+    std::vector<bool> overflow(buffer_elements, false);
     const char *name = job->f->name;
     int isFDim = job->isFDim;
     int skipNanInf = job->skipNanInf;
@@ -447,7 +452,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             vlog_error("Error: clFinish failed! err: %d\n", error);
             goto exit;
         }
-        free(overflow);
         return CL_SUCCESS;
     }
 
@@ -799,7 +803,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     }
 
 exit:
-    if (overflow) free(overflow);
     return error;
 }
 
@@ -807,7 +810,7 @@ exit:
 
 int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -816,7 +819,6 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -846,27 +848,10 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -963,27 +948,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 2fcc8c10..f15c21ed 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -287,11 +292,11 @@ constexpr size_t specialValuesIntCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -568,7 +573,7 @@ exit:
 
 int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -577,7 +582,6 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -602,27 +606,10 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -722,27 +709,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index e1538e3c..9e27b007 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -279,11 +284,11 @@ constexpr size_t specialValuesIntCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     float ulps = job->ulps;
@@ -561,7 +566,7 @@ exit:
 
 int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -570,7 +575,6 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -716,27 +703,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 605a3144..c407fdaa 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -125,7 +126,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -148,11 +150,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -281,11 +286,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
@@ -619,7 +624,7 @@ exit:
 int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
                                            bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -628,7 +633,6 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -653,27 +657,10 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -770,27 +757,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 8448af54..7fbb07c2 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -123,7 +124,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -146,11 +148,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -271,18 +276,18 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     bool relaxedMode = job->relaxedMode;
     float ulps = getAllowedUlpError(job->f, relaxedMode);
     MTdata d = tinfo->d;
     cl_int error;
-    cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+    std::vector<bool> overflow(buffer_elements, false);
     const char *name = job->f->name;
     cl_uint *t = 0;
     cl_float *r = 0;
@@ -445,7 +450,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gSkipCorrectnessTesting)
     {
-        free(overflow);
         return CL_SUCCESS;
     }
 
@@ -738,7 +742,6 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     }
 
 exit:
-    if (overflow) free(overflow);
     return error;
 }
 
@@ -747,7 +750,7 @@ exit:
 int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
                                         bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -756,7 +759,6 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -783,27 +785,10 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -900,27 +885,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
new file mode 100644
index 00000000..3eafb6de
--- /dev/null
+++ b/test_conformance/math_brute_force/common.h
@@ -0,0 +1,27 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "utility.h"
+
+#include <array>
+#include <vector>
+
+// Array of thread-specific kernels for each vector size.
+using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
+
+#endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 11281261..6db6aa56 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -124,7 +125,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -142,11 +144,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -270,11 +275,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
     MTdata d = tinfo->d;
@@ -577,13 +582,12 @@ exit:
 
 int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -607,28 +611,11 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
             i * test_info.subBufferSize * sizeof(cl_double),
@@ -711,27 +698,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 6475e4bb..d6d5c8eb 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -111,7 +112,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -122,7 +123,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -140,11 +142,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -260,11 +265,11 @@ constexpr size_t specialValuesCount =
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     MTdata d = tinfo->d;
@@ -565,13 +570,12 @@ exit:
 
 int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -596,27 +600,10 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -700,27 +687,20 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        free_mtdata(threadInfo.d);
+        clReleaseMemObject(threadInfo.inBuf);
+        clReleaseMemObject(threadInfo.inBuf2);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 860e4596..1978c185 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -148,12 +153,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
     cl_int error;
@@ -362,13 +367,12 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -392,27 +396,10 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -484,25 +471,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 58a2a954..ece5e9b6 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -106,7 +107,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -117,7 +118,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -133,11 +135,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -147,12 +152,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
     cl_int error = CL_SUCCESS;
@@ -376,13 +381,12 @@ exit:
 
 int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -407,27 +411,10 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -499,25 +486,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index e52f2f0a..6691f462 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1055,8 +1055,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
                 cl_uint kernel_count, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    int error = 0;
-    cl_uint i;
     char options[200] = "";
 
     if (gForceFTZ)
@@ -1074,7 +1072,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
         strcat(options, " -cl-fast-relaxed-math");
     }
 
-    error =
+    int error =
         create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
     if (error != CL_SUCCESS)
     {
@@ -1082,9 +1080,7 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
         return error;
     }
 
-
-    memset(k, 0, kernel_count * sizeof(*k));
-    for (i = 0; i < kernel_count; i++)
+    for (cl_uint i = 0; i < kernel_count; i++)
     {
         k[i] = clCreateKernel(*p, name, &error);
         if (NULL == k[i] || error)
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index dcd21884..2d455047 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -107,7 +108,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -118,7 +119,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -136,11 +138,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -157,12 +162,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     cl_int error;
@@ -389,14 +394,13 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
 
     logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -422,27 +426,10 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -526,25 +513,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index f176fb95..83d27b0b 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -105,7 +106,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
-    cl_kernel **kernels;
+    KernelMatrix &kernels;
     cl_program *programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
@@ -116,7 +117,8 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+                       info->kernels[i].data(), info->programs + i,
+                       info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -134,11 +136,14 @@ struct TestInfo
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
     cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
@@ -155,12 +160,12 @@ struct TestInfo
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_float);
     cl_uint scale = job->scale;
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     const char *fname = job->f->name;
     bool relaxedMode = job->relaxedMode;
@@ -541,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
 int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 {
-    TestInfo test_info;
+    TestInfo test_info{};
     cl_int error;
     float maxError = 0.0f;
     double maxErrorVal = 0.0;
@@ -550,7 +555,6 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
     // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
     test_info.threadCount = GetThreadCount();
     test_info.subBufferSize = BUFFER_SIZE
         / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
@@ -576,27 +580,10 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     // every thread
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
+        test_info.k[i].resize(test_info.threadCount, nullptr);
     }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
+
+    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -704,25 +691,18 @@ exit:
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
         clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
+        for (auto &kernel : test_info.k[i])
         {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
+            clReleaseKernel(kernel);
         }
     }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
 
-        free(test_info.tinfo);
+    for (auto &threadInfo : test_info.tinfo)
+    {
+        clReleaseMemObject(threadInfo.inBuf);
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+            clReleaseMemObject(threadInfo.outBuf[j]);
+        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
-- 
cgit v1.2.3


From 0876ea10be4783340683c9970c5899ac8ed1d6ab Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Mon, 24 May 2021 16:59:03 +0100
Subject: Ignore padding bits in clCopyImage/clFillImage testing (#1184)

The CL_UNORM_SHORT_555 and CL_UNORM_INT_101010 formats contain padding
bits which need to be ignored in clCopyImage and clFillImage testing.

For clFillImage tests, padding was not ignored for the CL_UNORM_SHORT_555
format, and was ignored for CL_UNORM_INT_101010 by modifying actual and
reference data.  For clCopyImage tests, padding was not ignored, both for
CL_UNORM_SHORT_555 and for CL_UNORM_INT_101010.

Fix this by adding a new compare_scanlines() function, which is used for
both of these formats, and does not modify the actual or reference data.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_common/harness/imageHelpers.cpp               | 41 ++++++++++++++++++++++
 test_common/harness/imageHelpers.h                 |  3 ++
 .../images/clCopyImage/test_copy_generic.cpp       | 23 ++++++------
 .../images/clFillImage/test_fill_generic.cpp       | 28 ++++++---------
 4 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index d1754653..314709f8 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -479,6 +479,47 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
     }
 }
 
+size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
+                         const char *bPtr)
+{
+    size_t pixel_size = get_pixel_size(imageInfo->format);
+    size_t column;
+
+    for (column = 0; column < imageInfo->width; column++)
+    {
+        switch (imageInfo->format->image_channel_data_type)
+        {
+            // If the data type is 101010, then ignore bits 31 and 32 when
+            // comparing the row
+            case CL_UNORM_INT_101010: {
+                cl_uint aPixel = *(cl_uint *)aPtr;
+                cl_uint bPixel = *(cl_uint *)bPtr;
+                if ((aPixel & 0x3fffffff) != (bPixel & 0x3fffffff))
+                    return column;
+            }
+            break;
+
+            // If the data type is 555, ignore bit 15 when comparing the row
+            case CL_UNORM_SHORT_555: {
+                cl_ushort aPixel = *(cl_ushort *)aPtr;
+                cl_ushort bPixel = *(cl_ushort *)bPtr;
+                if ((aPixel & 0x7fff) != (bPixel & 0x7fff)) return column;
+            }
+            break;
+
+            default:
+                if (memcmp(aPtr, bPtr, pixel_size) != 0) return column;
+                break;
+        }
+
+        aPtr += pixel_size;
+        bPtr += pixel_size;
+    }
+
+    // If we didn't find a difference, return the width of the image
+    return column;
+}
+
 int random_log_in_range(int minV, int maxV, MTdata d)
 {
     double v = log2(((double)genrand_int32(d) / (double)0xffffffff) + 1);
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 848ec655..e728a939 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -139,6 +139,9 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
                                         image_descriptor *imageInfo, size_t y,
                                         size_t thirdDim);
 
+size_t compare_scanlines(const image_descriptor *imageInfo, const char *aPtr,
+                         const char *bPtr);
+
 void get_max_sizes(size_t *numberOfSizes, const int maxNumberOfSizes,
                    size_t sizes[][3], size_t maxWidth, size_t maxHeight,
                    size_t maxDepth, size_t maxArraySize,
diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index 026916e8..bd935e7f 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -547,18 +547,19 @@ int test_copy_image_generic( cl_context context, cl_command_queue queue, image_d
         {
             if( memcmp( sourcePtr, destPtr, scanlineSize ) != 0 )
             {
-                // Find the first missing pixel
+                // Find the first differing pixel
                 size_t pixel_size = get_pixel_size( dstImageInfo->format );
-                size_t where = 0;
-                for( where = 0; where < dstImageInfo->width; where++ )
-                    if( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
-                        break;
-
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, dstImageInfo, y,
-                    dstImageInfo->depth);
-                return -1;
+                size_t where =
+                    compare_scanlines(dstImageInfo, sourcePtr, destPtr);
+
+                if (where < dstImageInfo->width)
+                {
+                    print_first_pixel_difference_error(
+                        where, sourcePtr + pixel_size * where,
+                        destPtr + pixel_size * where, dstImageInfo, y,
+                        dstImageInfo->depth);
+                    return -1;
+                }
             }
             sourcePtr += rowPitch;
             if((dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY || dstImageInfo->type == CL_MEM_OBJECT_IMAGE1D))
diff --git a/test_conformance/images/clFillImage/test_fill_generic.cpp b/test_conformance/images/clFillImage/test_fill_generic.cpp
index 59bf24ad..6cd6beb0 100644
--- a/test_conformance/images/clFillImage/test_fill_generic.cpp
+++ b/test_conformance/images/clFillImage/test_fill_generic.cpp
@@ -468,27 +468,19 @@ int test_fill_image_generic( cl_context context, cl_command_queue queue, image_d
     {
         for ( size_t y = 0; y < secondDim; y++ )
         {
-            // If the data type is 101010 ignore bits 31 and 32 when comparing the row
-            if (imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010) {
-              for (size_t w=0;w!=scanlineSize/4;++w) {
-                ((cl_uint*)sourcePtr)[w] &= 0x3FFFFFFF;
-                ((cl_uint*)destPtr)[w] &= 0x3FFFFFFF;
-              }
-            }
-
             if (memcmp( sourcePtr, destPtr, scanlineSize ) != 0)
             {
-                // Find the first missing pixel
+                // Find the first differing pixel
                 size_t pixel_size = get_pixel_size( imageInfo->format );
-                size_t where = 0;
-                for ( where = 0; where < imageInfo->width; where++ )
-                    if ( memcmp( sourcePtr + pixel_size * where, destPtr + pixel_size * where, pixel_size) )
-                        break;
-
-                print_first_pixel_difference_error(
-                    where, sourcePtr + pixel_size * where,
-                    destPtr + pixel_size * where, imageInfo, y, thirdDim);
-                return -1;
+                size_t where = compare_scanlines(imageInfo, sourcePtr, destPtr);
+
+                if (where < imageInfo->width)
+                {
+                    print_first_pixel_difference_error(
+                        where, sourcePtr + pixel_size * where,
+                        destPtr + pixel_size * where, imageInfo, y, thirdDim);
+                    return -1;
+                }
             }
 
             total_matched += scanlineSize;
-- 
cgit v1.2.3


From bd3135dd016aae7ae6454725ef3761d132a38926 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Thu, 27 May 2021 10:05:27 +0200
Subject: Extend list of known extensions (#1262)

---
 .../compiler/test_compiler_defines_for_extensions.cpp            | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 483adac9..a1d8d8bd 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -49,7 +49,7 @@ const char *known_extensions[] = {
     "cl_khr_subgroup_shuffle",
     "cl_khr_subgroup_shuffle_relative",
     "cl_khr_subgroup_clustered_reduce",
-
+    "cl_khr_extended_bit_ops",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",
@@ -71,10 +71,13 @@ const char *known_extensions[] = {
     "cl_khr_spirv_no_integer_wrap_decoration",
     "cl_khr_extended_versioning",
     "cl_khr_device_uuid",
+    "cl_khr_pci_bus_info",
+    "cl_khr_suggested_local_work_size",
+    "cl_khr_spirv_linkonce_odr",
 };
 
-size_t num_known_extensions = sizeof(known_extensions)/sizeof(char*);
-size_t first_API_extension = 27;
+size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *);
+size_t first_API_extension = 28;
 
 const char *known_embedded_extensions[] = {
     "cles_khr_int64",
-- 
cgit v1.2.3


From 315998511abe3959be21962a696911b43d4d5f59 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Thu, 27 May 2021 09:06:13 +0100
Subject: Address data race in ThreadPool (#1265)

ThreadSanitizer detects some data race in ThreadPool. They stem from
inappropriate usage of volatile which are replaced with std::atomic
variables in this patch.

This patch focuses on data races identified while running the
math_brute_force component. For example, it doesn't fully remove usage
of ThreadPool_AtomicAdd from other components of the CTS. Furthermore,
thread leaks, most likely because threads are not joined, are not
addressed.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 test_common/harness/ThreadPool.cpp | 44 ++++++++++++++++----------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/test_common/harness/ThreadPool.cpp b/test_common/harness/ThreadPool.cpp
index 5dae1b4a..62798045 100644
--- a/test_common/harness/ThreadPool.cpp
+++ b/test_common/harness/ThreadPool.cpp
@@ -22,6 +22,8 @@
 #if defined(__APPLE__) || defined(__linux__) || defined(_WIN32)
 // or any other POSIX system
 
+#include <atomic>
+
 #if defined(_WIN32)
 #include <windows.h>
 #if defined(_MSC_VER)
@@ -241,7 +243,7 @@ pthread_cond_t cond_var;
 // Condition variable state. How many iterations on the function left to run,
 // set to CL_INT_MAX to cause worker threads to exit. Note: this value might
 // go negative.
-volatile cl_int gRunCount = 0;
+std::atomic<cl_int> gRunCount{ 0 };
 
 // State that only changes when the threadpool is not working.
 volatile TPFuncPtr gFunc_ptr = NULL;
@@ -261,19 +263,20 @@ pthread_cond_t caller_cond_var;
 
 // # of threads intended to be running. Running threads will decrement this
 // as they discover they've run out of work to do.
-volatile cl_int gRunning = 0;
+std::atomic<cl_int> gRunning{ 0 };
 
 // The total number of threads launched.
-volatile cl_int gThreadCount = 0;
+std::atomic<cl_int> gThreadCount{ 0 };
+
 #ifdef _WIN32
 void ThreadPool_WorkerFunc(void *p)
 #else
 void *ThreadPool_WorkerFunc(void *p)
 #endif
 {
-    cl_uint threadID = ThreadPool_AtomicAdd((volatile cl_int *)p, 1);
-    cl_int item = ThreadPool_AtomicAdd(&gRunCount, -1);
-    // log_info( "ThreadPool_WorkerFunc start: gRunning = %d\n", gRunning );
+    auto &tid = *static_cast<std::atomic<cl_uint> *>(p);
+    cl_uint threadID = tid++;
+    cl_int item = gRunCount--;
 
     while (MAX_COUNT > item)
     {
@@ -282,8 +285,6 @@ void *ThreadPool_WorkerFunc(void *p)
         // check for more work to do
         if (0 >= item)
         {
-            // log_info("Thread %d has run out of work.\n", threadID);
-
             // No work to do. Attempt to block waiting for work
 #if defined(_WIN32)
             EnterCriticalSection(cond_lock);
@@ -298,9 +299,7 @@ void *ThreadPool_WorkerFunc(void *p)
             }
 #endif // !_WIN32
 
-            cl_int remaining = ThreadPool_AtomicAdd(&gRunning, -1);
-            // log_info("ThreadPool_WorkerFunc: gRunning = %d\n",
-            //          remaining - 1);
+            cl_int remaining = gRunning--;
             if (1 == remaining)
             { // last thread out signal the main thread to wake up
 #if defined(_WIN32)
@@ -350,7 +349,7 @@ void *ThreadPool_WorkerFunc(void *p)
 #endif // !_WIN32
 
                 // try again to get a valid item id
-                item = ThreadPool_AtomicAdd(&gRunCount, -1);
+                item = gRunCount--;
                 if (MAX_COUNT <= item) // exit if we are done
                 {
 #if defined(_WIN32)
@@ -362,8 +361,7 @@ void *ThreadPool_WorkerFunc(void *p)
                 }
             }
 
-            ThreadPool_AtomicAdd(&gRunning, 1);
-            // log_info("Thread %d has found work.\n", threadID);
+            gRunning++;
 
 #if defined(_WIN32)
             LeaveCriticalSection(cond_lock);
@@ -447,12 +445,12 @@ void *ThreadPool_WorkerFunc(void *p)
         }
 
         // get the next item
-        item = ThreadPool_AtomicAdd(&gRunCount, -1);
+        item = gRunCount--;
     }
 
 exit:
     log_info("ThreadPool: thread %d exiting.\n", threadID);
-    ThreadPool_AtomicAdd(&gThreadCount, -1);
+    gThreadCount--;
 #if !defined(_WIN32)
     return NULL;
 #endif
@@ -487,7 +485,7 @@ void ThreadPool_Init(void)
 {
     cl_int i;
     int err;
-    volatile cl_uint threadID = 0;
+    std::atomic<cl_uint> threadID{ 0 };
 
     // Check for manual override of multithreading code. We add this for better
     // debuggability.
@@ -624,7 +622,7 @@ void ThreadPool_Init(void)
     }
 #endif // !_WIN32
 
-    gRunning = gThreadCount;
+    gRunning = gThreadCount.load();
     // init threads
     for (i = 0; i < gThreadCount; i++)
     {
@@ -688,10 +686,6 @@ static BOOL CALLBACK _ThreadPool_Init(_PINIT_ONCE InitOnce, PVOID Parameter,
 
 void ThreadPool_Exit(void)
 {
-#ifndef _WIN32
-    int err;
-#endif
-    int count;
     gRunCount = CL_INT_MAX;
 
 #if defined(__GNUC__)
@@ -705,13 +699,13 @@ void ThreadPool_Exit(void)
 #endif
 
     // spin waiting for threads to die
-    for (count = 0; 0 != gThreadCount && count < 1000; count++)
+    for (int count = 0; 0 != gThreadCount && count < 1000; count++)
     {
 #if defined(_WIN32)
         _WakeAllConditionVariable(cond_var);
         Sleep(1);
 #else // !_WIN32
-        if ((err = pthread_cond_broadcast(&cond_var)))
+        if (int err = pthread_cond_broadcast(&cond_var))
         {
             log_error("Error %d from pthread_cond_broadcast. Unable to wake up "
                       "work threads. ThreadPool_Exit failed.\n",
@@ -725,7 +719,7 @@ void ThreadPool_Exit(void)
     if (gThreadCount)
         log_error("Error: Thread pool timed out after 1 second with %d threads "
                   "still active.\n",
-                  gThreadCount);
+                  gThreadCount.load());
     else
         log_info("Thread pool exited in a orderly fashion.\n");
 }
-- 
cgit v1.2.3


From 76ace61314e061fbf0f8a058dab19fa7e04df937 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Wed, 9 Jun 2021 11:08:08 +0100
Subject: Fix leaks in callSingleTestFunction (#1224)

The context and queue were not released when the test is not supported
in offline mode or the queue couldn't be created.

Inline test_missing_support_offline_cmpiler_ret macro, remove dead
parameter of check_functions_for_offline_compiler and slightly refactor
callSingleTestFunction to address leaks.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 test_common/harness/errorHelpers.cpp | 15 +++++++--------
 test_common/harness/errorHelpers.h   | 18 +-----------------
 test_common/harness/testHarness.cpp  | 12 +++++++++---
 3 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index 3ddbc37b..ea928bc3 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -21,6 +21,7 @@
 #include "errorHelpers.h"
 
 #include "parseParameters.h"
+#include "testHarness.h"
 
 #include <CL/cl_half.h>
 
@@ -690,21 +691,19 @@ const char *subtests_to_skip_with_offline_compiler[] = {
     "library_function"
 };
 
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device)
+bool check_functions_for_offline_compiler(const char *subtestname)
 {
     if (gCompilationMode != kOnline)
     {
         size_t nNotRequiredWithOfflineCompiler =
-            sizeof(subtests_to_skip_with_offline_compiler) / sizeof(char *);
-        size_t i;
-        for (i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
+            ARRAY_SIZE(subtests_to_skip_with_offline_compiler);
+        for (size_t i = 0; i < nNotRequiredWithOfflineCompiler; ++i)
         {
             if (!strcmp(subtestname, subtests_to_skip_with_offline_compiler[i]))
             {
-                return 1;
+                return false;
             }
         }
     }
-    return 0;
-}
\ No newline at end of file
+    return true;
+}
diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index c7f49e3d..d59bc78d 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -92,21 +92,6 @@ static int vlog_win32(const char *format, ...);
                         "the device version! (from %s:%d)\n",                  \
                         msg, __FILE__, __LINE__);
 
-#define test_missing_support_offline_cmpiler(errCode, msg)                     \
-    test_missing_support_offline_cmpiler_ret(errCode, msg, errCode)
-// this macro should always return CL_SUCCESS, but print the skip message on
-// test not supported with offline compiler
-#define test_missing_support_offline_cmpiler_ret(errCode, msg, retValue)       \
-    {                                                                          \
-        if (errCode != CL_SUCCESS)                                             \
-        {                                                                      \
-            log_info("INFO: Subtest %s tests is not supported in offline "     \
-                     "compiler execution path! (from %s:%d)\n",                \
-                     msg, __FILE__, __LINE__);                                 \
-            return TEST_SKIP;                                                  \
-        }                                                                      \
-    }
-
 // expected error code vs. what we got
 #define test_failure_error(errCode, expectedErrCode, msg)                      \
     test_failure_error_ret(errCode, expectedErrCode, msg,                      \
@@ -181,8 +166,7 @@ extern const char *GetAddressModeName(cl_addressing_mode mode);
 extern const char *GetQueuePropertyName(cl_command_queue_properties properties);
 
 extern const char *GetDeviceTypeName(cl_device_type type);
-int check_functions_for_offline_compiler(const char *subtestname,
-                                         cl_device_id device);
+bool check_functions_for_offline_compiler(const char *subtestname);
 cl_int OutputBuildLogs(cl_program program, cl_uint num_devices,
                        cl_device_id *device_list);
 
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index 1aec3d07..b3863918 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -783,6 +783,14 @@ test_status callSingleTestFunction(test_definition test,
         return TEST_SKIP;
     }
 
+    if (!check_functions_for_offline_compiler(test.name))
+    {
+        log_info("Subtest %s tests is not supported in offline compiler "
+                 "execution path!\n",
+                 test.name);
+        return TEST_SKIP;
+    }
+
     /* Create a context to work with, unless we're told not to */
     if (!forceNoContextCreation)
     {
@@ -812,14 +820,12 @@ test_status callSingleTestFunction(test_definition test,
         if (queue == NULL)
         {
             print_error(error, "Unable to create testing command queue");
+            clReleaseContext(context);
             return TEST_FAIL;
         }
     }
 
     /* Run the test and print the result */
-    error = check_functions_for_offline_compiler(test.name, deviceToUse);
-    test_missing_support_offline_cmpiler(error, test.name);
-
     if (test.func == NULL)
     {
         // Skip unimplemented test, can happen when all of the tests are
-- 
cgit v1.2.3


From 277d029608ed0f7fdb0823f010d653dd0169c82c Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Fri, 11 Jun 2021 09:42:20 +0100
Subject: Run spirv-val for SPIR-V offline compilation (#1108)

The common --disable-spirv-validation option has been added to disable
this functionality.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_common/harness/kernelHelpers.cpp   | 22 +++++++++++++++++++++-
 test_common/harness/parseParameters.cpp | 32 +++++++++++++++++++++++++++++++-
 test_common/harness/parseParameters.h   |  2 ++
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index aaf0d689..18f51cbe 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -530,7 +530,7 @@ static int get_offline_compiler_output(
                                                 sourceFilename, outputFilename);
             if (error != CL_SUCCESS) return error;
 
-            // read output file
+            // open output file for reading
             ifs.open(outputFilename.c_str(), std::ios::binary);
             if (!ifs.good())
             {
@@ -540,6 +540,26 @@ static int get_offline_compiler_output(
             }
         }
     }
+
+    if (compilationMode == kSpir_v && !gDisableSPIRVValidation)
+    {
+        std::string runString = gSPIRVValidator + " " + outputFilename;
+
+        int returnCode = system(runString.c_str());
+        if (returnCode == -1)
+        {
+            log_error("Error: failed to invoke SPIR-V validator\n");
+            return CL_COMPILE_PROGRAM_FAILURE;
+        }
+        else if (returnCode != 0)
+        {
+            log_error(
+                "Failed to validate SPIR-V file %s: system() returned 0x%x\n",
+                outputFilename.c_str(), returnCode);
+            return CL_COMPILE_PROGRAM_FAILURE;
+        }
+    }
+
     return CL_SUCCESS;
 }
 
diff --git a/test_common/harness/parseParameters.cpp b/test_common/harness/parseParameters.cpp
index b2ab5b02..e946d744 100644
--- a/test_common/harness/parseParameters.cpp
+++ b/test_common/harness/parseParameters.cpp
@@ -28,11 +28,14 @@
 using namespace std;
 
 #define DEFAULT_COMPILATION_PROGRAM "cl_offline_compiler"
+#define DEFAULT_SPIRV_VALIDATOR "spirv-val"
 
 CompilationMode gCompilationMode = kOnline;
 CompilationCacheMode gCompilationCacheMode = kCacheModeCompileIfAbsent;
 std::string gCompilationCachePath = ".";
 std::string gCompilationProgram = DEFAULT_COMPILATION_PROGRAM;
+bool gDisableSPIRVValidation = false;
+std::string gSPIRVValidator = DEFAULT_SPIRV_VALIDATOR;
 
 void helpInfo()
 {
@@ -62,7 +65,14 @@ For offline compilation (binary and spir-v modes) only:
         Path for offline compiler output and CL source
     --compilation-program <prog>
         Program to use for offline compilation, defaults to:
-            )" DEFAULT_COMPILATION_PROGRAM "\n\n");
+            )" DEFAULT_COMPILATION_PROGRAM R"(
+
+For spir-v mode only:
+    --disable-spirv-validation
+        Disable validation of SPIR-V using the SPIR-V validator
+    --spirv-validator
+        Path for SPIR-V validator, defaults to )" DEFAULT_SPIRV_VALIDATOR "\n"
+        "\n");
 }
 
 int parseCustomParam(int argc, const char *argv[], const char *ignore)
@@ -198,6 +208,26 @@ int parseCustomParam(int argc, const char *argv[], const char *ignore)
                 return -1;
             }
         }
+        else if (!strcmp(argv[i], "--disable-spirv-validation"))
+        {
+            delArg++;
+            gDisableSPIRVValidation = true;
+        }
+        else if (!strcmp(argv[i], "--spirv-validator"))
+        {
+            delArg++;
+            if ((i + 1) < argc)
+            {
+                delArg++;
+                gSPIRVValidator = argv[i + 1];
+            }
+            else
+            {
+                log_error("Program argument for --spirv-validator was not "
+                          "specified.\n");
+                return -1;
+            }
+        }
 
         // cleaning parameters from argv tab
         for (int j = i; j < argc - delArg; j++) argv[j] = argv[j + delArg];
diff --git a/test_common/harness/parseParameters.h b/test_common/harness/parseParameters.h
index b0f8328a..437e12f9 100644
--- a/test_common/harness/parseParameters.h
+++ b/test_common/harness/parseParameters.h
@@ -38,6 +38,8 @@ extern CompilationMode gCompilationMode;
 extern CompilationCacheMode gCompilationCacheMode;
 extern std::string gCompilationCachePath;
 extern std::string gCompilationProgram;
+extern bool gDisableSPIRVValidation;
+extern std::string gSPIRVValidator;
 
 extern int parseCustomParam(int argc, const char *argv[],
                             const char *ignore = 0);
-- 
cgit v1.2.3


From 80a4a833be9bc390574801dc5a47b02a579bf47b Mon Sep 17 00:00:00 2001
From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com>
Date: Fri, 11 Jun 2021 09:44:16 +0100
Subject: Minor fixes for CL_UNORM_SHORT_565, CL_UNORM_SHORT_555 (#1129)

* Minor fixes for CL_UNORM_SHORT_565, CL_UNORM_SHORT_555

* Fix verification for undefined bit
* Relax current infinitely precision requirement for these formats
  and move check in common function.
* Add proper debug output.

Signed-off-by: John Kesapides <john.kesapides@arm.com>

* Minor Formating fix.

Signed-off-by: John Kesapides <john.kesapides@arm.com>
---
 .../images/kernel_read_write/test_common.cpp       | 37 +++++++++-
 .../images/kernel_read_write/test_common.h         |  5 ++
 .../images/kernel_read_write/test_write_1D.cpp     | 70 +++++++++++++++----
 .../kernel_read_write/test_write_1D_array.cpp      | 71 +++++++++++++++----
 .../kernel_read_write/test_write_2D_array.cpp      | 80 ++++++++++++++++++----
 .../images/kernel_read_write/test_write_3D.cpp     | 80 ++++++++++++++++++----
 .../images/kernel_read_write/test_write_image.cpp  | 72 +++++++++++++++----
 7 files changed, 339 insertions(+), 76 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index e76710b5..375ee587 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -1543,4 +1543,39 @@ int test_read_image(cl_context context, cl_command_queue queue,
     }
 
     return numTries != MAX_TRIES || numClamped != MAX_CLAMPED;
-}
\ No newline at end of file
+}
+
+void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr)
+{
+    // mask off the top bit (bit 15) if the image format is (CL_UNORM_SHORT_555,
+    // CL_RGB). (Note: OpenCL says: the top bit is undefined meaning it can be
+    // either 0 or 1.)
+    if (imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555)
+    {
+        cl_ushort *temp = (cl_ushort *)resultPtr;
+        temp[0] &= 0x7fff;
+    }
+}
+
+int filter_rounding_errors(int forceCorrectlyRoundedWrites,
+                           image_descriptor *imageInfo, float *errors)
+{
+    // We are allowed 0.6 absolute error vs. infinitely precise for some
+    // normalized formats
+    if (0 == forceCorrectlyRoundedWrites
+        && (imageInfo->format->image_channel_data_type == CL_UNORM_INT8
+            || imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010
+            || imageInfo->format->image_channel_data_type == CL_UNORM_INT16
+            || imageInfo->format->image_channel_data_type == CL_SNORM_INT8
+            || imageInfo->format->image_channel_data_type == CL_SNORM_INT16
+            || imageInfo->format->image_channel_data_type == CL_UNORM_SHORT_555
+            || imageInfo->format->image_channel_data_type
+                == CL_UNORM_SHORT_565))
+    {
+        if (!(fabsf(errors[0]) > 0.6f) && !(fabsf(errors[1]) > 0.6f)
+            && !(fabsf(errors[2]) > 0.6f) && !(fabsf(errors[3]) > 0.6f))
+            return 0;
+    }
+
+    return 1;
+}
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index e7ecbe0b..656c41f4 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -229,3 +229,8 @@ int determine_validation_error_offset(
     }
     return 0;
 }
+
+
+extern int filter_rounding_errors(int forceCorrectlyRoundedWrites,
+                                  image_descriptor *imageInfo, float *errors);
+extern void filter_undefined_bits(image_descriptor *imageInfo, char *resultPtr);
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 41983edf..1556a76a 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "../testBase.h"
+#include "test_common.h"
 
 #if !defined(_WIN32)
 #include <sys/mman.h>
@@ -395,6 +396,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                     }
                     else
                     {
+                        filter_undefined_bits(imageInfo, resultPtr);
+
                         // Exact result passes every time
                         if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
                         {
@@ -403,21 +406,8 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                             float errors[4] = {NAN, NAN, NAN, NAN};
                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
 
-                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
-                            if( 0 == forceCorrectlyRoundedWrites    &&
-                               (
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
-                                ))
-                            {
-                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
-                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
-                                    failure = 0;
-                            }
-
+                            failure = filter_rounding_errors(
+                                forceCorrectlyRoundedWrites, imageInfo, errors);
 
                             if( failure )
                             {
@@ -458,6 +448,56 @@ int test_write_image_1D( cl_device_id device, cl_context context, cl_command_que
                                         log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
+                                    case CL_UNORM_SHORT_565: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x3F,
+                                                  (ref_value[0] >> 11) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x3F,
+                                                  (test_value[0] >> 11) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
+                                    case CL_UNORM_SHORT_555: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x1F,
+                                                  (ref_value[0] >> 10) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x1F,
+                                                  (test_value[0] >> 10) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
                                     case CL_UNORM_INT16:
                                     case CL_SNORM_INT16:
                                     case CL_UNSIGNED_INT16:
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index c771704c..e9aa8d2a 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "../testBase.h"
+#include "test_common.h"
 
 #if !defined(_WIN32)
 #include <sys/mman.h>
@@ -415,6 +416,9 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                     }
                     else
                     {
+
+                        filter_undefined_bits(imageInfo, resultPtr);
+
                         // Exact result passes every time
                         if( memcmp( resultBuffer, resultPtr, pixelSize ) != 0 )
                         {
@@ -423,21 +427,8 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                             float errors[4] = {NAN, NAN, NAN, NAN};
                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
 
-                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
-                            if( 0 == forceCorrectlyRoundedWrites    &&
-                               (
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
-                                ))
-                            {
-                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
-                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
-                                    failure = 0;
-                            }
-
+                            failure = filter_rounding_errors(
+                                forceCorrectlyRoundedWrites, imageInfo, errors);
 
                             if( failure )
                             {
@@ -478,6 +469,56 @@ int test_write_image_1D_array( cl_device_id device, cl_context context, cl_comma
                                         log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
                                         log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
+                                    case CL_UNORM_SHORT_565: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x3F,
+                                                  (ref_value[0] >> 11) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x3F,
+                                                  (test_value[0] >> 11) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
+                                    case CL_UNORM_SHORT_555: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x1F,
+                                                  (ref_value[0] >> 10) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x1F,
+                                                  (test_value[0] >> 10) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
                                     case CL_UNORM_INT16:
                                     case CL_SNORM_INT16:
                                     case CL_UNSIGNED_INT16:
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index 08a7a803..5bca7124 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "../testBase.h"
+#include "test_common.h"
 
 #if !defined(_WIN32)
 #include <sys/mman.h>
@@ -438,6 +439,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                         }
                         else
                         {
+
+                            filter_undefined_bits(imageInfo, resultPtr);
+
                             // Exact result passes every time
                             if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
                             {
@@ -446,21 +450,9 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                 float errors[4] = {NAN, NAN, NAN, NAN};
                                 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
 
-                                // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
-                                if( 0 == forceCorrectlyRoundedWrites    &&
-                                   (
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
-                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
-                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT16
-                                    ))
-                                {
-                                    if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
-                                       ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
-                                        failure = 0;
-                                }
-
+                                failure = filter_rounding_errors(
+                                    forceCorrectlyRoundedWrites, imageInfo,
+                                    errors);
 
                                 if( failure )
                                 {
@@ -501,6 +493,64 @@ int test_write_image_2D_array( cl_device_id device, cl_context context, cl_comma
                                             log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
+                                        case CL_UNORM_SHORT_565: {
+                                            cl_uint *ref_value =
+                                                (cl_uint *)resultBuffer;
+                                            cl_uint *test_value =
+                                                (cl_uint *)resultPtr;
+
+                                            log_error(" Expected: 0x%2.2x "
+                                                      "Actual: 0x%2.2x \n",
+                                                      ref_value[0],
+                                                      test_value[0]);
+
+                                            log_error(
+                                                "    Expected: 0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                ref_value[0] & 0x1F,
+                                                (ref_value[0] >> 5) & 0x3F,
+                                                (ref_value[0] >> 11) & 0x1F);
+                                            log_error(
+                                                "    Actual:   0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                test_value[0] & 0x1F,
+                                                (test_value[0] >> 5) & 0x3F,
+                                                (test_value[0] >> 11) & 0x1F);
+                                            log_error(
+                                                "    Error:    %f %f %f %f\n",
+                                                errors[0], errors[1],
+                                                errors[2]);
+                                            break;
+                                        }
+                                        case CL_UNORM_SHORT_555: {
+                                            cl_uint *ref_value =
+                                                (cl_uint *)resultBuffer;
+                                            cl_uint *test_value =
+                                                (cl_uint *)resultPtr;
+
+                                            log_error(" Expected: 0x%2.2x "
+                                                      "Actual: 0x%2.2x \n",
+                                                      ref_value[0],
+                                                      test_value[0]);
+
+                                            log_error(
+                                                "    Expected: 0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                ref_value[0] & 0x1F,
+                                                (ref_value[0] >> 5) & 0x1F,
+                                                (ref_value[0] >> 10) & 0x1F);
+                                            log_error(
+                                                "    Actual:   0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                test_value[0] & 0x1F,
+                                                (test_value[0] >> 5) & 0x1F,
+                                                (test_value[0] >> 10) & 0x1F);
+                                            log_error(
+                                                "    Error:    %f %f %f %f\n",
+                                                errors[0], errors[1],
+                                                errors[2]);
+                                            break;
+                                        }
                                         case CL_UNORM_INT16:
                                         case CL_SNORM_INT16:
                                         case CL_UNSIGNED_INT16:
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index 5cc96bb4..d9a69627 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "../testBase.h"
+#include "test_common.h"
 
 #if !defined(_WIN32)
 #include <sys/mman.h>
@@ -445,6 +446,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                         }
                         else
                         {
+
+                            filter_undefined_bits(imageInfo, resultPtr);
+
                             // Exact result passes every time
                             if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
                             {
@@ -453,21 +457,9 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                 float errors[4] = {NAN, NAN, NAN, NAN};
                                 pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
 
-                                // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
-                                if( 0 == forceCorrectlyRoundedWrites    &&
-                                   (
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
-                                    imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
-                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
-                                    imageInfo->format->image_channel_data_type == CL_SNORM_INT16
-                                    ))
-                                {
-                                    if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
-                                       ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
-                                        failure = 0;
-                                }
-
+                                failure = filter_rounding_errors(
+                                    forceCorrectlyRoundedWrites, imageInfo,
+                                    errors);
 
                                 if( failure )
                                 {
@@ -508,6 +500,64 @@ int test_write_image_3D( cl_device_id device, cl_context context, cl_command_que
                                             log_error( "    Actual:   0x%2.2x 0x%2.2x 0x%2.2x 0x%2.2x\n", ((cl_uchar*)resultPtr)[0], ((cl_uchar*)resultPtr)[1], ((cl_uchar*)resultPtr)[2], ((cl_uchar*)resultPtr)[3] );
                                             log_error( "    Error:    %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                             break;
+                                        case CL_UNORM_SHORT_565: {
+                                            cl_uint *ref_value =
+                                                (cl_uint *)resultBuffer;
+                                            cl_uint *test_value =
+                                                (cl_uint *)resultPtr;
+
+                                            log_error(" Expected: 0x%2.2x "
+                                                      "Actual: 0x%2.2x \n",
+                                                      ref_value[0],
+                                                      test_value[0]);
+
+                                            log_error(
+                                                "    Expected: 0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                ref_value[0] & 0x1F,
+                                                (ref_value[0] >> 5) & 0x3F,
+                                                (ref_value[0] >> 11) & 0x1F);
+                                            log_error(
+                                                "    Actual:   0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                test_value[0] & 0x1F,
+                                                (test_value[0] >> 5) & 0x3F,
+                                                (test_value[0] >> 11) & 0x1F);
+                                            log_error(
+                                                "    Error:    %f %f %f %f\n",
+                                                errors[0], errors[1],
+                                                errors[2]);
+                                            break;
+                                        }
+                                        case CL_UNORM_SHORT_555: {
+                                            cl_uint *ref_value =
+                                                (cl_uint *)resultBuffer;
+                                            cl_uint *test_value =
+                                                (cl_uint *)resultPtr;
+
+                                            log_error(" Expected: 0x%2.2x "
+                                                      "Actual: 0x%2.2x \n",
+                                                      ref_value[0],
+                                                      test_value[0]);
+
+                                            log_error(
+                                                "    Expected: 0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                ref_value[0] & 0x1F,
+                                                (ref_value[0] >> 5) & 0x1F,
+                                                (ref_value[0] >> 10) & 0x1F);
+                                            log_error(
+                                                "    Actual:   0x%2.2x 0x%2.2x "
+                                                "0x%2.2x \n",
+                                                test_value[0] & 0x1F,
+                                                (test_value[0] >> 5) & 0x1F,
+                                                (test_value[0] >> 10) & 0x1F);
+                                            log_error(
+                                                "    Error:    %f %f %f %f\n",
+                                                errors[0], errors[1],
+                                                errors[2]);
+                                            break;
+                                        }
                                         case CL_UNORM_INT16:
                                         case CL_SNORM_INT16:
                                         case CL_UNSIGNED_INT16:
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index e40e80d6..9cc9698c 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "../testBase.h"
+#include "test_common.h"
 
 #if !defined(_WIN32)
 #include <sys/mman.h>
@@ -477,6 +478,9 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                     }
                     else
                     {
+
+                        filter_undefined_bits(imageInfo, resultPtr);
+
                         // Exact result passes every time
                         if( memcmp( resultBuffer, resultPtr, get_pixel_size( imageInfo->format ) ) != 0 )
                         {
@@ -485,21 +489,8 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                             float errors[4] = {NAN, NAN, NAN, NAN};
                             pack_image_pixel_error( (float *)imagePtr, imageInfo->format, resultBuffer, errors );
 
-                            // We are allowed 0.6 absolute error vs. infinitely precise for some normalized formats
-                            if( 0 == forceCorrectlyRoundedWrites    &&
-                               (
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT_101010 ||
-                                imageInfo->format->image_channel_data_type == CL_UNORM_INT16 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT8 ||
-                                imageInfo->format->image_channel_data_type == CL_SNORM_INT16
-                                ))
-                            {
-                                if( ! (fabsf( errors[0] ) > 0.6f) && ! (fabsf( errors[1] ) > 0.6f) &&
-                                   ! (fabsf( errors[2] ) > 0.6f) && ! (fabsf( errors[3] ) > 0.6f)  )
-                                    failure = 0;
-                            }
-
+                            failure = filter_rounding_errors(
+                                forceCorrectlyRoundedWrites, imageInfo, errors);
 
                             if( failure )
                             {
@@ -577,6 +568,57 @@ int test_write_image( cl_device_id device, cl_context context, cl_command_queue
                                         log_error( "    Actual:   %a %a %a %a\n", ((cl_float*)resultPtr)[0], ((cl_float*)resultPtr)[1], ((cl_float*)resultPtr)[2], ((cl_float*)resultPtr)[3] );
                                         log_error( "    Ulps:     %f %f %f %f\n", errors[0], errors[1], errors[2], errors[3] );
                                         break;
+                                    case CL_UNORM_SHORT_565: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x3F,
+                                                  (ref_value[0] >> 11) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x3F,
+                                                  (test_value[0] >> 11) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
+
+                                    case CL_UNORM_SHORT_555: {
+                                        cl_uint *ref_value =
+                                            (cl_uint *)resultBuffer;
+                                        cl_uint *test_value =
+                                            (cl_uint *)resultPtr;
+
+                                        log_error(" Expected: 0x%2.2x Actual: "
+                                                  "0x%2.2x \n",
+                                                  ref_value[0], test_value[0]);
+
+                                        log_error("    Expected: 0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  ref_value[0] & 0x1F,
+                                                  (ref_value[0] >> 5) & 0x1F,
+                                                  (ref_value[0] >> 10) & 0x1F);
+                                        log_error("    Actual:   0x%2.2x "
+                                                  "0x%2.2x 0x%2.2x \n",
+                                                  test_value[0] & 0x1F,
+                                                  (test_value[0] >> 5) & 0x1F,
+                                                  (test_value[0] >> 10) & 0x1F);
+                                        log_error("    Error:    %f %f %f %f\n",
+                                                  errors[0], errors[1],
+                                                  errors[2]);
+                                        break;
+                                    }
                                 }
 
                                 float *v = (float *)(char *)imagePtr;
-- 
cgit v1.2.3


From 69f0054001438078c11478546b855c06e07e1817 Mon Sep 17 00:00:00 2001
From: Marco Antognini <marco.antognini@arm.com>
Date: Thu, 17 Jun 2021 14:05:05 +0100
Subject: Fix copy and move semantics of wrapper classes (#1268)

* Remove unnecessary code

These custom equality operators are not necessary because of the
conversion operators which already allow using the standard equality
operators between two pointers.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Fix copy and move semantics of wrapper classes

Related to #465.

The Wrapper classes are rewritten to properly handle copy and move
semantics, while preserving the existing API and removing code
duplication.

Add error handling around clRelase* and clRetain*.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>

* Address build issue on 32-bit Windows

Include linkage in RetainReleaseType function type.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
---
 test_common/harness/typeWrappers.h            | 246 ++++++++++----------------
 test_conformance/buffers/test_sub_buffers.cpp |   3 +-
 2 files changed, 91 insertions(+), 158 deletions(-)

diff --git a/test_common/harness/typeWrappers.h b/test_common/harness/typeWrappers.h
index 9a58a9d2..50c7c938 100644
--- a/test_common/harness/typeWrappers.h
+++ b/test_common/harness/typeWrappers.h
@@ -16,122 +16,134 @@
 #ifndef _typeWrappers_h
 #define _typeWrappers_h
 
-#include <stdio.h>
-#include <stdlib.h>
-
 #if !defined(_WIN32)
 #include <sys/mman.h>
 #endif
 
 #include "compat.h"
-#include <stdio.h>
 #include "mt19937.h"
 #include "errorHelpers.h"
 #include "kernelHelpers.h"
 
-/* cl_context wrapper */
+#include <cstdlib>
+#include <type_traits>
 
-class clContextWrapper {
-public:
-    clContextWrapper() { mContext = NULL; }
-    clContextWrapper(cl_context program) { mContext = program; }
-    ~clContextWrapper()
-    {
-        if (mContext != NULL) clReleaseContext(mContext);
-    }
+namespace wrapper_details {
+
+// clRetain*() and clRelease*() functions share the same type.
+template <typename T> // T should be cl_context, cl_program, ...
+using RetainReleaseType = cl_int CL_API_CALL(T);
 
-    clContextWrapper &operator=(const cl_context &rhs)
+// A generic wrapper class that follows OpenCL retain/release semantics.
+//
+// This Wrapper class implement copy and move semantics, which makes it
+// compatible with standard containers for example.
+//
+// Template parameters:
+//  - T is the cl_* type (e.g. cl_context, cl_program, ...)
+//  - Retain is the clRetain* function (e.g. clRetainContext, ...)
+//  - Release is the clRelease* function (e.g. clReleaseContext, ...)
+template <typename T, RetainReleaseType<T> Retain, RetainReleaseType<T> Release>
+class Wrapper {
+    static_assert(std::is_pointer<T>::value, "T should be a pointer type.");
+    T object = nullptr;
+
+    void retain()
     {
-        mContext = rhs;
-        return *this;
+        if (!object) return;
+
+        auto err = Retain(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clRetain*() failed");
+            std::abort();
+        }
     }
-    operator cl_context() const { return mContext; }
 
-    cl_context *operator&() { return &mContext; }
+    void release()
+    {
+        if (!object) return;
 
-    bool operator==(const cl_context &rhs) { return mContext == rhs; }
+        auto err = Release(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clRelease*() failed");
+            std::abort();
+        }
+    }
 
-protected:
-    cl_context mContext;
-};
+public:
+    Wrapper() = default;
 
-/* cl_program wrapper */
+    // On initialisation, assume the object has a refcount of one.
+    Wrapper(T object): object(object) {}
 
-class clProgramWrapper {
-public:
-    clProgramWrapper() { mProgram = NULL; }
-    clProgramWrapper(cl_program program) { mProgram = program; }
-    ~clProgramWrapper()
+    // On assignment, assume the object has a refcount of one.
+    Wrapper &operator=(T rhs)
     {
-        if (mProgram != NULL) clReleaseProgram(mProgram);
+        reset(rhs);
+        return *this;
     }
 
-    clProgramWrapper &operator=(const cl_program &rhs)
+    // Copy semantics, increase retain count.
+    Wrapper(Wrapper const &w) { *this = w; }
+    Wrapper &operator=(Wrapper const &w)
     {
-        mProgram = rhs;
+        reset(w.object);
+        retain();
         return *this;
     }
-    operator cl_program() const { return mProgram; }
-
-    cl_program *operator&() { return &mProgram; }
 
-    bool operator==(const cl_program &rhs) { return mProgram == rhs; }
-
-protected:
-    cl_program mProgram;
-};
-
-/* cl_kernel wrapper */
-
-class clKernelWrapper {
-public:
-    clKernelWrapper() { mKernel = NULL; }
-    clKernelWrapper(cl_kernel kernel) { mKernel = kernel; }
-    ~clKernelWrapper()
+    // Move semantics, directly take ownership.
+    Wrapper(Wrapper &&w) { *this = std::move(w); }
+    Wrapper &operator=(Wrapper &&w)
     {
-        if (mKernel != NULL) clReleaseKernel(mKernel);
+        reset(w.object);
+        w.object = nullptr;
+        return *this;
     }
 
-    clKernelWrapper &operator=(const cl_kernel &rhs)
+    ~Wrapper() { reset(); }
+
+    // Release the existing object, if any, and own the new one, if any.
+    void reset(T new_object = nullptr)
     {
-        mKernel = rhs;
-        return *this;
+        release();
+        object = new_object;
     }
-    operator cl_kernel() const { return mKernel; }
 
-    cl_kernel *operator&() { return &mKernel; }
+    operator T() const { return object; }
 
-    bool operator==(const cl_kernel &rhs) { return mKernel == rhs; }
-
-protected:
-    cl_kernel mKernel;
+    // Ideally this function should not exist as it breaks encapsulation by
+    // allowing external mutation of the Wrapper internal state. However, too
+    // much code currently relies on this. For example, instead of using T* as
+    // output parameters, existing code can be updated to use Wrapper& instead.
+    T *operator&() { return &object; }
 };
 
-/* cl_mem (stream) wrapper */
+} // namespace wrapper_details
 
-class clMemWrapper {
-public:
-    clMemWrapper() { mMem = NULL; }
-    clMemWrapper(cl_mem mem) { mMem = mem; }
-    ~clMemWrapper()
-    {
-        if (mMem != NULL) clReleaseMemObject(mMem);
-    }
+using clContextWrapper =
+    wrapper_details::Wrapper<cl_context, clRetainContext, clReleaseContext>;
 
-    clMemWrapper &operator=(const cl_mem &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_mem() const { return mMem; }
+using clProgramWrapper =
+    wrapper_details::Wrapper<cl_program, clRetainProgram, clReleaseProgram>;
 
-    cl_mem *operator&() { return &mMem; }
+using clKernelWrapper =
+    wrapper_details::Wrapper<cl_kernel, clRetainKernel, clReleaseKernel>;
 
-    bool operator==(const cl_mem &rhs) { return mMem == rhs; }
+using clMemWrapper =
+    wrapper_details::Wrapper<cl_mem, clRetainMemObject, clReleaseMemObject>;
 
-protected:
-    cl_mem mMem;
-};
+using clCommandQueueWrapper =
+    wrapper_details::Wrapper<cl_command_queue, clRetainCommandQueue,
+                             clReleaseCommandQueue>;
+
+using clSamplerWrapper =
+    wrapper_details::Wrapper<cl_sampler, clRetainSampler, clReleaseSampler>;
+
+using clEventWrapper =
+    wrapper_details::Wrapper<cl_event, clRetainEvent, clReleaseEvent>;
 
 class clProtectedImage {
 public:
@@ -183,92 +195,12 @@ public:
 
     cl_mem *operator&() { return &image; }
 
-    bool operator==(const cl_mem &rhs) { return image == rhs; }
-
 protected:
     void *backingStore;
     size_t backingStoreSize;
     cl_mem image;
 };
 
-/* cl_command_queue wrapper */
-class clCommandQueueWrapper {
-public:
-    clCommandQueueWrapper() { mMem = NULL; }
-    clCommandQueueWrapper(cl_command_queue mem) { mMem = mem; }
-    ~clCommandQueueWrapper()
-    {
-        if (mMem != NULL)
-        {
-            clReleaseCommandQueue(mMem);
-        }
-    }
-
-    clCommandQueueWrapper &operator=(const cl_command_queue &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_command_queue() const { return mMem; }
-
-    cl_command_queue *operator&() { return &mMem; }
-
-    bool operator==(const cl_command_queue &rhs) { return mMem == rhs; }
-
-protected:
-    cl_command_queue mMem;
-};
-
-/* cl_sampler wrapper */
-class clSamplerWrapper {
-public:
-    clSamplerWrapper() { mMem = NULL; }
-    clSamplerWrapper(cl_sampler mem) { mMem = mem; }
-    ~clSamplerWrapper()
-    {
-        if (mMem != NULL) clReleaseSampler(mMem);
-    }
-
-    clSamplerWrapper &operator=(const cl_sampler &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_sampler() const { return mMem; }
-
-    cl_sampler *operator&() { return &mMem; }
-
-    bool operator==(const cl_sampler &rhs) { return mMem == rhs; }
-
-protected:
-    cl_sampler mMem;
-};
-
-/* cl_event wrapper */
-class clEventWrapper {
-public:
-    clEventWrapper() { mMem = NULL; }
-    clEventWrapper(cl_event mem) { mMem = mem; }
-    ~clEventWrapper()
-    {
-        if (mMem != NULL) clReleaseEvent(mMem);
-    }
-
-    clEventWrapper &operator=(const cl_event &rhs)
-    {
-        mMem = rhs;
-        return *this;
-    }
-    operator cl_event() const { return mMem; }
-
-    cl_event *operator&() { return &mMem; }
-
-    bool operator==(const cl_event &rhs) { return mMem == rhs; }
-
-protected:
-    cl_event mMem;
-};
-
 /* Generic protected memory buffer, for verifying access within bounds */
 class clProtectedArray {
 public:
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
index 3e50121a..691509fd 100644
--- a/test_conformance/buffers/test_sub_buffers.cpp
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -39,7 +39,8 @@ public:
         region.size = mSize;
 
         cl_int error;
-        mMem = clCreateSubBuffer( mParentBuffer, flags, CL_BUFFER_CREATE_TYPE_REGION, &region, &error );
+        reset(clCreateSubBuffer(mParentBuffer, flags,
+                                CL_BUFFER_CREATE_TYPE_REGION, &region, &error));
         return error;
     }
 };
-- 
cgit v1.2.3


From 236cd73fa17ed0c280b7aa6cd8a3dd116c4e5d2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Fri, 2 Jul 2021 10:34:13 +0100
Subject: Use macOS 10 in CI (#1282)

macOS jobs frequently fail. Since macos-11.0 support is considered experimental,
move to macos-10, using macos-latest so we automatically move to 11 when
stable.

See https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners

Signed-off-by: Kevin Petit <kevin.petit@arm.com>
---
 .github/workflows/presubmit.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 8ef7e663..2aedc199 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -11,7 +11,7 @@ jobs:
     strategy:
       matrix:
         mainmatrix: [true]
-        os: [ubuntu-20.04, macos-11.0]
+        os: [ubuntu-20.04, macos-latest]
         include:
           - os: ubuntu-20.04
             mainmatrix: true
-- 
cgit v1.2.3


From 4a03bb79cb8fbd6012b02783e59565cce0b1f376 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Mon, 5 Jul 2021 10:35:39 -0400
Subject: Fix double-release of memory objects (#1277)

A recent update to the object wrapper classes (#1268) changed the
behavior of assigning to a wrapper, whereby the wrapped object is now
released upon assignment. A couple of tests were manually calling
clReleaseMemObject and then assigning `nullptr` to the wrapper,
resulting in the wrapper calling clReleaseMemObject on an object that
had already been destroyed.
---
 test_conformance/api/test_mem_object_info.cpp | 7 -------
 test_conformance/api/test_mem_objects.cpp     | 7 +------
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp
index ccfeaafa..2afe0437 100644
--- a/test_conformance/api/test_mem_object_info.cpp
+++ b/test_conformance/api/test_mem_object_info.cpp
@@ -348,14 +348,7 @@ int test_get_buffer_info( cl_device_id deviceID, cl_context context, cl_command_
             TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_ASSOCIATED_MEMOBJECT, origObj, (cl_mem)bufferObject, "associated mem object", "%p", void * )
 
             TEST_MEM_OBJECT_PARAM( subBufferObject, CL_MEM_OFFSET, offset, (size_t)( addressAlign ), "offset", "%ld", size_t )
-
-            clReleaseMemObject( subBufferObject );
-            subBufferObject = NULL;
-
         }
-
-        clReleaseMemObject( bufferObject );
-        bufferObject = NULL;
     }
 
     return CL_SUCCESS;
diff --git a/test_conformance/api/test_mem_objects.cpp b/test_conformance/api/test_mem_objects.cpp
index c29613f9..f1a4e993 100644
--- a/test_conformance/api/test_mem_objects.cpp
+++ b/test_conformance/api/test_mem_objects.cpp
@@ -48,12 +48,7 @@ int test_mem_object_destructor_callback_single(clMemWrapper &memObject)
     test_error(error, "Unable to set destructor callback");
 
     // Now release the buffer, which SHOULD call the callbacks
-    error = clReleaseMemObject(memObject);
-    test_error(error, "Unable to release test buffer");
-
-    // Note: since we manually released the mem wrapper, we need to set it to
-    // NULL to prevent a double-release
-    memObject = NULL;
+    memObject.reset();
 
     // At this point, all three callbacks should have already been called
     int numErrors = 0;
-- 
cgit v1.2.3


From 433974fd2810f91b093f10121adca64e1eefd789 Mon Sep 17 00:00:00 2001
From: BKoscielak <bartosz.koscielak@intel.com>
Date: Tue, 13 Jul 2021 18:15:33 +0200
Subject: Fix check for image support in test_basic sizeof (#1269)

---
 test_conformance/basic/test_sizeof.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 66a6c563..6b1ddb56 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -292,11 +292,11 @@ int test_sizeof(cl_device_id device, cl_context context, cl_command_queue queue,
             continue;
         }
 
-        if( gIsEmbedded &&
-           0 == strcmp(other_types[i], "image3d_t") &&
-           checkFor3DImageSupport( device ) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+        if (0 == strcmp(other_types[i], "image3d_t")
+            && checkFor3DImageSupport(device) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
         {
-            log_info("\n3D images are not supported by this device. Skipping test.\t");
+            log_info("\n3D images are not supported by this device. "
+                     "Skipping test.\t");
             continue;
         }
 
-- 
cgit v1.2.3


From b500da5fbc97a2fc73ee39e30c00e7d759a11215 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 21 Jul 2021 00:48:48 -0700
Subject: add basic test for cl_khr_pci_bus_info (#1227)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add basic test for cl_khr_pci_bus_info

* correctly use TEST_SKIPPED_ITSELF

Co-authored-by: Kévin Petit <kpet@free.fr>

* fix related usage of TEST_SKIPPED_ITSELF

Co-authored-by: Kévin Petit <kpet@free.fr>
---
 test_conformance/computeinfo/CMakeLists.txt   |  1 +
 test_conformance/computeinfo/device_uuid.cpp  |  2 +-
 test_conformance/computeinfo/main.cpp         |  3 +-
 test_conformance/computeinfo/pci_bus_info.cpp | 53 +++++++++++++++++++++++++++
 4 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 test_conformance/computeinfo/pci_bus_info.cpp

diff --git a/test_conformance/computeinfo/CMakeLists.txt b/test_conformance/computeinfo/CMakeLists.txt
index 207223a3..06f0599c 100644
--- a/test_conformance/computeinfo/CMakeLists.txt
+++ b/test_conformance/computeinfo/CMakeLists.txt
@@ -5,6 +5,7 @@ set(${MODULE_NAME}_SOURCES
         device_uuid.cpp
         extended_versioning.cpp
         conforming_version.cpp
+        pci_bus_info.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/computeinfo/device_uuid.cpp b/test_conformance/computeinfo/device_uuid.cpp
index 1ef9dad2..7f29d0b6 100644
--- a/test_conformance/computeinfo/device_uuid.cpp
+++ b/test_conformance/computeinfo/device_uuid.cpp
@@ -105,7 +105,7 @@ int test_device_uuid(cl_device_id deviceID, cl_context context,
     if (!is_extension_available(deviceID, "cl_khr_device_uuid"))
     {
         log_info("cl_khr_device_uuid not supported. Skipping test...\n");
-        return 0;
+        return TEST_SKIPPED_ITSELF;
     }
 
     int total_errors = 0;
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 4860b445..d993655b 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -1421,15 +1421,16 @@ int test_computeinfo(cl_device_id deviceID, cl_context context,
 extern int test_extended_versioning(cl_device_id, cl_context, cl_command_queue,
                                     int);
 extern int test_device_uuid(cl_device_id, cl_context, cl_command_queue, int);
-
 extern int test_conformance_version(cl_device_id, cl_context, cl_command_queue,
                                     int);
+extern int test_pci_bus_info(cl_device_id, cl_context, cl_command_queue, int);
 
 test_definition test_list[] = {
     ADD_TEST(computeinfo),
     ADD_TEST(extended_versioning),
     ADD_TEST(device_uuid),
     ADD_TEST_VERSION(conformance_version, Version(3, 0)),
+    ADD_TEST(pci_bus_info),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/computeinfo/pci_bus_info.cpp b/test_conformance/computeinfo/pci_bus_info.cpp
new file mode 100644
index 00000000..cd62ca05
--- /dev/null
+++ b/test_conformance/computeinfo/pci_bus_info.cpp
@@ -0,0 +1,53 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <array>
+#include <bitset>
+
+#include "harness/testHarness.h"
+#include "harness/deviceInfo.h"
+
+int test_pci_bus_info(cl_device_id deviceID, cl_context context,
+                      cl_command_queue ignoreQueue, int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_pci_bus_info"))
+    {
+        log_info("cl_khr_pci_bus_info not supported. Skipping test...\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_int error;
+
+    cl_device_pci_bus_info_khr info;
+
+    size_t size_ret;
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, 0, NULL,
+                            &size_ret);
+    test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR size");
+    test_assert_error(
+        size_ret == sizeof(info),
+        "Query for CL_DEVICE_PCI_BUS_INFO_KHR returned an unexpected size");
+
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(info),
+                            &info, NULL);
+    test_error(error, "Unable to query CL_DEVICE_PCI_BUS_INFO_KHR");
+
+    log_info("\tPCI Bus Info: %04x:%02x:%02x.%x\n", info.pci_domain,
+             info.pci_bus, info.pci_device, info.pci_function);
+
+    return TEST_PASS;
+}
-- 
cgit v1.2.3


From 12637114ac81d292861daf4bff2397a36581f712 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Wed, 21 Jul 2021 09:50:22 +0200
Subject: Fix double release of object in test_api and test_gl (#1287)

* Fix clang format only

* Fix double release of objects
---
 .../api/test_context_destructor_callback.cpp       |   7 +-
 test_conformance/gl/test_buffers.cpp               | 415 ++++++++------
 test_conformance/gl/test_fence_sync.cpp            | 624 ++++++++++++---------
 3 files changed, 586 insertions(+), 460 deletions(-)

diff --git a/test_conformance/api/test_context_destructor_callback.cpp b/test_conformance/api/test_context_destructor_callback.cpp
index 1d73a3c4..d29d9039 100644
--- a/test_conformance/api/test_context_destructor_callback.cpp
+++ b/test_conformance/api/test_context_destructor_callback.cpp
@@ -52,12 +52,7 @@ int test_context_destructor_callback(cl_device_id deviceID, cl_context context,
     test_error(error, "Unable to set destructor callback");
 
     // Now release the context, which SHOULD call the callbacks
-    error = clReleaseContext(localContext);
-    test_error(error, "Unable to release local context");
-
-    // Note: since we manually released the context, we need to set it to NULL
-    // to prevent a double-release
-    localContext = NULL;
+    localContext.reset();
 
     // At this point, all three callbacks should have already been called
     int numErrors = 0;
diff --git a/test_conformance/gl/test_buffers.cpp b/test_conformance/gl/test_buffers.cpp
index 35f01ee6..c61610d0 100644
--- a/test_conformance/gl/test_buffers.cpp
+++ b/test_conformance/gl/test_buffers.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,126 +17,126 @@
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
 
-#if !defined (__APPLE__)
-    #include <CL/cl_gl.h>
+#if !defined(__APPLE__)
+#include <CL/cl_gl.h>
 #endif
 
 static const char *bufferKernelPattern =
-"__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, __global %s%s *glDest )\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"     clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
-"     glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
-"}\n";
-
-#define TYPE_CASE( enum, type, range, offset )    \
-    case enum:    \
-    {                \
-        cl_##type *ptr = (cl_##type *)outData; \
-        for( i = 0; i < count; i++ ) \
-            ptr[ i ] = (cl_##type)( ( genrand_int32(d) & range ) - offset ); \
-        break; \
+    "__kernel void sample_test( __global %s%s *source, __global %s%s *clDest, "
+    "__global %s%s *glDest )\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     clDest[ tid ] = source[ tid ] + (%s%s)(1);\n"
+    "     glDest[ tid ] = source[ tid ] + (%s%s)(2);\n"
+    "}\n";
+
+#define TYPE_CASE(enum, type, range, offset)                                   \
+    case enum: {                                                               \
+        cl_##type *ptr = (cl_##type *)outData;                                 \
+        for (i = 0; i < count; i++)                                            \
+            ptr[i] = (cl_##type)((genrand_int32(d) & range) - offset);         \
+        break;                                                                 \
     }
 
-void gen_input_data( ExplicitType type, size_t count, MTdata d, void *outData )
+void gen_input_data(ExplicitType type, size_t count, MTdata d, void *outData)
 {
     size_t i;
 
-    switch( type )
+    switch (type)
     {
-        case kBool:
-        {
+        case kBool: {
             bool *boolPtr = (bool *)outData;
-            for( i = 0; i < count; i++ )
+            for (i = 0; i < count; i++)
             {
-                boolPtr[i] = ( genrand_int32(d) & 1 ) ? true : false;
+                boolPtr[i] = (genrand_int32(d) & 1) ? true : false;
             }
             break;
         }
 
-        TYPE_CASE( kChar, char, 250, 127 )
-        TYPE_CASE( kUChar, uchar, 250, 0 )
-        TYPE_CASE( kShort, short, 65530, 32767 )
-        TYPE_CASE( kUShort, ushort, 65530, 0 )
-        TYPE_CASE( kInt, int, 0x0fffffff, 0x70000000 )
-        TYPE_CASE( kUInt, uint, 0x0fffffff, 0 )
+            TYPE_CASE(kChar, char, 250, 127)
+            TYPE_CASE(kUChar, uchar, 250, 0)
+            TYPE_CASE(kShort, short, 65530, 32767)
+            TYPE_CASE(kUShort, ushort, 65530, 0)
+            TYPE_CASE(kInt, int, 0x0fffffff, 0x70000000)
+            TYPE_CASE(kUInt, uint, 0x0fffffff, 0)
 
-        case kLong:
-        {
+        case kLong: {
             cl_long *longPtr = (cl_long *)outData;
-            for( i = 0; i < count; i++ )
+            for (i = 0; i < count; i++)
             {
-                longPtr[i] = (cl_long)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+                longPtr[i] = (cl_long)genrand_int32(d)
+                    | ((cl_ulong)genrand_int32(d) << 32);
             }
             break;
         }
 
-        case kULong:
-        {
+        case kULong: {
             cl_ulong *ulongPtr = (cl_ulong *)outData;
-            for( i = 0; i < count; i++ )
+            for (i = 0; i < count; i++)
             {
-                ulongPtr[i] = (cl_ulong)genrand_int32(d) | ( (cl_ulong)genrand_int32(d) << 32 );
+                ulongPtr[i] = (cl_ulong)genrand_int32(d)
+                    | ((cl_ulong)genrand_int32(d) << 32);
             }
             break;
         }
 
-        case kFloat:
-        {
+        case kFloat: {
             cl_float *floatPtr = (float *)outData;
-            for( i = 0; i < count; i++ )
-                floatPtr[i] = get_random_float( -100000.f, 100000.f, d );
+            for (i = 0; i < count; i++)
+                floatPtr[i] = get_random_float(-100000.f, 100000.f, d);
             break;
         }
 
         default:
-            log_error( "ERROR: Invalid type passed in to generate_random_data!\n" );
+            log_error(
+                "ERROR: Invalid type passed in to generate_random_data!\n");
             break;
     }
 }
 
-#define INC_CASE( enum, type )    \
-    case enum:    \
-    {                \
-        cl_##type *src = (cl_##type *)inData; \
-        cl_##type *dst = (cl_##type *)outData; \
-        *dst = *src + 1; \
-        break; \
+#define INC_CASE(enum, type)                                                   \
+    case enum: {                                                               \
+        cl_##type *src = (cl_##type *)inData;                                  \
+        cl_##type *dst = (cl_##type *)outData;                                 \
+        *dst = *src + 1;                                                       \
+        break;                                                                 \
     }
 
-void get_incremented_value( void *inData, void *outData, ExplicitType type )
+void get_incremented_value(void *inData, void *outData, ExplicitType type)
 {
-    switch( type )
+    switch (type)
     {
-        INC_CASE( kChar, char )
-        INC_CASE( kUChar, uchar )
-        INC_CASE( kShort, short )
-        INC_CASE( kUShort, ushort )
-        INC_CASE( kInt, int )
-        INC_CASE( kUInt, uint )
-        INC_CASE( kLong, long )
-        INC_CASE( kULong, ulong )
-        INC_CASE( kFloat, float )
-        default:
-            break;
+        INC_CASE(kChar, char)
+        INC_CASE(kUChar, uchar)
+        INC_CASE(kShort, short)
+        INC_CASE(kUShort, ushort)
+        INC_CASE(kInt, int)
+        INC_CASE(kUInt, uint)
+        INC_CASE(kLong, long)
+        INC_CASE(kULong, ulong)
+        INC_CASE(kFloat, float)
+        default: break;
     }
 }
 
-int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t vecSize, int numElements, int validate_only, MTdata d)
+int test_buffer_kernel(cl_context context, cl_command_queue queue,
+                       ExplicitType vecType, size_t vecSize, int numElements,
+                       int validate_only, MTdata d)
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper streams[ 3 ];
+    clMemWrapper streams[3];
     size_t dataSize = numElements * 16 * sizeof(cl_long);
 #if !(defined(_WIN32) && defined(_MSC_VER))
-    cl_long inData[numElements * 16], outDataCL[numElements * 16], outDataGL[ numElements * 16 ];
+    cl_long inData[numElements * 16], outDataCL[numElements * 16],
+        outDataGL[numElements * 16];
 #else
-    cl_long* inData    = (cl_long*)_malloca(dataSize);
-    cl_long* outDataCL = (cl_long*)_malloca(dataSize);
-    cl_long* outDataGL = (cl_long*)_malloca(dataSize);
+    cl_long *inData = (cl_long *)_malloca(dataSize);
+    cl_long *outDataCL = (cl_long *)_malloca(dataSize);
+    cl_long *outDataGL = (cl_long *)_malloca(dataSize);
 #endif
     glBufferWrapper inGLBuffer, outGLBuffer;
-    int    i;
+    int i;
     size_t bufferSize;
 
     int error;
@@ -146,210 +146,259 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
     char sizeName[4];
 
     /* Create the source */
-    if( vecSize == 1 )
-        sizeName[ 0 ] = 0;
+    if (vecSize == 1)
+        sizeName[0] = 0;
     else
-        sprintf( sizeName, "%d", (int)vecSize );
+        sprintf(sizeName, "%d", (int)vecSize);
 
-    sprintf( kernelSource, bufferKernelPattern, get_explicit_type_name( vecType ), sizeName,
-                                                get_explicit_type_name( vecType ), sizeName,
-                                                get_explicit_type_name( vecType ), sizeName,
-                                                get_explicit_type_name( vecType ), sizeName,
-                                                get_explicit_type_name( vecType ), sizeName );
+    sprintf(kernelSource, bufferKernelPattern, get_explicit_type_name(vecType),
+            sizeName, get_explicit_type_name(vecType), sizeName,
+            get_explicit_type_name(vecType), sizeName,
+            get_explicit_type_name(vecType), sizeName,
+            get_explicit_type_name(vecType), sizeName);
 
     /* Create kernels */
     programPtr = kernelSource;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&programPtr, "sample_test" ) )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programPtr, "sample_test"))
     {
         return -1;
     }
 
-    bufferSize = numElements * vecSize * get_explicit_type_size( vecType );
+    bufferSize = numElements * vecSize * get_explicit_type_size(vecType);
 
     /* Generate some almost-random input data */
-    gen_input_data( vecType, vecSize * numElements, d, inData );
-    memset( outDataCL, 0, dataSize );
-    memset( outDataGL, 0, dataSize );
+    gen_input_data(vecType, vecSize * numElements, d, inData);
+    memset(outDataCL, 0, dataSize);
+    memset(outDataGL, 0, dataSize);
 
     /* Generate some GL buffers to go against */
-    glGenBuffers( 1, &inGLBuffer );
-    glGenBuffers( 1, &outGLBuffer );
+    glGenBuffers(1, &inGLBuffer);
+    glGenBuffers(1, &outGLBuffer);
 
-    glBindBuffer( GL_ARRAY_BUFFER, inGLBuffer );
-    glBufferData( GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW );
+    glBindBuffer(GL_ARRAY_BUFFER, inGLBuffer);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, inData, GL_STATIC_DRAW);
 
-    // Note: we need to bind the output buffer, even though we don't care about its values yet,
-    // because CL needs it to get the buffer size
-    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
-    glBufferData( GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW );
+    // Note: we need to bind the output buffer, even though we don't care about
+    // its values yet, because CL needs it to get the buffer size
+    glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
+    glBufferData(GL_ARRAY_BUFFER, bufferSize, outDataGL, GL_STATIC_DRAW);
 
-    glBindBuffer( GL_ARRAY_BUFFER, 0 );
+    glBindBuffer(GL_ARRAY_BUFFER, 0);
     glFinish();
 
 
-    /* Generate some streams. The first and last ones are GL, middle one just vanilla CL */
-    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_ONLY, inGLBuffer, &error );
-    test_error( error, "Unable to create input GL buffer" );
+    /* Generate some streams. The first and last ones are GL, middle one just
+     * vanilla CL */
+    streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_ONLY,
+                                             inGLBuffer, &error);
+    test_error(error, "Unable to create input GL buffer");
 
-    streams[ 1 ] = clCreateBuffer( context, CL_MEM_READ_WRITE, bufferSize, NULL, &error );
-    test_error( error, "Unable to create output CL buffer" );
+    streams[1] =
+        clCreateBuffer(context, CL_MEM_READ_WRITE, bufferSize, NULL, &error);
+    test_error(error, "Unable to create output CL buffer");
 
-    streams[ 2 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_WRITE_ONLY, outGLBuffer, &error );
-    test_error( error, "Unable to create output GL buffer" );
+    streams[2] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_WRITE_ONLY,
+                                             outGLBuffer, &error);
+    test_error(error, "Unable to create output GL buffer");
 
 
-  /* Validate the info */
-  if (validate_only) {
-    int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
-                  CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
-    for(i=0;i<3;i++)
+    /* Validate the info */
+    if (validate_only)
     {
-        clReleaseMemObject(streams[i]);
-        streams[i] = NULL;
-    }
+        int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER,
+                                        (GLuint)inGLBuffer, (GLenum)0, 0)
+                      | CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER,
+                                          (GLuint)outGLBuffer, (GLenum)0, 0));
+        for (i = 0; i < 3; i++)
+        {
+            streams[i].reset();
+        }
 
-    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
-    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+        glDeleteBuffers(1, &inGLBuffer);
+        inGLBuffer = 0;
+        glDeleteBuffers(1, &outGLBuffer);
+        outGLBuffer = 0;
 
-    return result;
-  }
+        return result;
+    }
 
     /* Assign streams and execute */
-    for( int i = 0; i < 3; i++ )
+    for (int i = 0; i < 3; i++)
     {
-        error = clSetKernelArg( kernel, i, sizeof( streams[ i ] ), &streams[ i ] );
-        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+        test_error(error, "Unable to set kernel arguments");
     }
-    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL);
-  test_error( error, "Unable to acquire GL obejcts");
-    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL);
-  test_error( error, "Unable to acquire GL obejcts");
+    error =
+        (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+    test_error(error, "Unable to acquire GL obejcts");
+    error =
+        (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
+    test_error(error, "Unable to acquire GL obejcts");
 
     /* Run the kernel */
     threads[0] = numElements;
 
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
-
-  error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 0 ], 0, NULL, NULL );
-  test_error(error, "clEnqueueReleaseGLObjects failed");
-    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &streams[ 2 ], 0, NULL, NULL );
-  test_error(error, "clEnqueueReleaseGLObjects failed");
-
-    // Get the results from both CL and GL and make sure everything looks correct
-    error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, bufferSize, outDataCL, 0, NULL, NULL );
-    test_error( error, "Unable to read output CL array!" );
-
-    glBindBuffer( GL_ARRAY_BUFFER, outGLBuffer );
-    void *glMem = glMapBuffer( GL_ARRAY_BUFFER, GL_READ_ONLY );
-    memcpy( outDataGL, glMem, bufferSize );
-    glUnmapBuffer( GL_ARRAY_BUFFER );
-
-    char *inP = (char *)inData, *glP = (char *)outDataGL, *clP = (char *)outDataCL;
+    error = get_max_common_work_group_size(context, kernel, threads[0],
+                                           &localThreads[0]);
+    test_error(error, "Unable to get work group size to use");
+
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+
+    error =
+        (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[0], 0, NULL, NULL);
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+    error =
+        (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &streams[2], 0, NULL, NULL);
+    test_error(error, "clEnqueueReleaseGLObjects failed");
+
+    // Get the results from both CL and GL and make sure everything looks
+    // correct
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, bufferSize,
+                                outDataCL, 0, NULL, NULL);
+    test_error(error, "Unable to read output CL array!");
+
+    glBindBuffer(GL_ARRAY_BUFFER, outGLBuffer);
+    void *glMem = glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
+    memcpy(outDataGL, glMem, bufferSize);
+    glUnmapBuffer(GL_ARRAY_BUFFER);
+
+    char *inP = (char *)inData, *glP = (char *)outDataGL,
+         *clP = (char *)outDataCL;
     error = 0;
-    for( size_t i = 0; i < numElements * vecSize; i++ )
+    for (size_t i = 0; i < numElements * vecSize; i++)
     {
         cl_long expectedCLValue, expectedGLValue;
-        get_incremented_value( inP, &expectedCLValue, vecType );
-        get_incremented_value( &expectedCLValue, &expectedGLValue, vecType );
+        get_incremented_value(inP, &expectedCLValue, vecType);
+        get_incremented_value(&expectedCLValue, &expectedGLValue, vecType);
 
-        if( memcmp( clP, &expectedCLValue, get_explicit_type_size( vecType ) ) != 0 )
+        if (memcmp(clP, &expectedCLValue, get_explicit_type_size(vecType)) != 0)
         {
-            char scratch[ 64 ];
-            log_error( "ERROR: Data sample %d from the CL output did not validate!\n", (int)i );
-            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
-            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedCLValue, get_explicit_type_size( vecType ), 1, scratch ) );
-            log_error( "\t  Actual: %s\n", GetDataVectorString( clP, get_explicit_type_size( vecType ), 1, scratch ) );
+            char scratch[64];
+            log_error(
+                "ERROR: Data sample %d from the CL output did not validate!\n",
+                (int)i);
+            log_error("\t   Input: %s\n",
+                      GetDataVectorString(inP, get_explicit_type_size(vecType),
+                                          1, scratch));
+            log_error("\tExpected: %s\n",
+                      GetDataVectorString(&expectedCLValue,
+                                          get_explicit_type_size(vecType), 1,
+                                          scratch));
+            log_error("\t  Actual: %s\n",
+                      GetDataVectorString(clP, get_explicit_type_size(vecType),
+                                          1, scratch));
             error = -1;
         }
 
-        if( memcmp( glP, &expectedGLValue, get_explicit_type_size( vecType ) ) != 0 )
+        if (memcmp(glP, &expectedGLValue, get_explicit_type_size(vecType)) != 0)
         {
-            char scratch[ 64 ];
-            log_error( "ERROR: Data sample %d from the GL output did not validate!\n", (int)i );
-            log_error( "\t   Input: %s\n", GetDataVectorString( inP, get_explicit_type_size( vecType ), 1, scratch ) );
-            log_error( "\tExpected: %s\n", GetDataVectorString( &expectedGLValue, get_explicit_type_size( vecType ), 1, scratch ) );
-            log_error( "\t  Actual: %s\n", GetDataVectorString( glP, get_explicit_type_size( vecType ), 1, scratch ) );
+            char scratch[64];
+            log_error(
+                "ERROR: Data sample %d from the GL output did not validate!\n",
+                (int)i);
+            log_error("\t   Input: %s\n",
+                      GetDataVectorString(inP, get_explicit_type_size(vecType),
+                                          1, scratch));
+            log_error("\tExpected: %s\n",
+                      GetDataVectorString(&expectedGLValue,
+                                          get_explicit_type_size(vecType), 1,
+                                          scratch));
+            log_error("\t  Actual: %s\n",
+                      GetDataVectorString(glP, get_explicit_type_size(vecType),
+                                          1, scratch));
             error = -1;
         }
 
-        if( error )
-            return error;
+        if (error) return error;
 
-        inP += get_explicit_type_size( vecType );
-        glP += get_explicit_type_size( vecType );
-        clP += get_explicit_type_size( vecType );
+        inP += get_explicit_type_size(vecType);
+        glP += get_explicit_type_size(vecType);
+        clP += get_explicit_type_size(vecType);
     }
 
-    for(i=0;i<3;i++)
+    for (i = 0; i < 3; i++)
     {
-        clReleaseMemObject(streams[i]);
-        streams[i] = NULL;
+        streams[i].reset();
     }
 
-    glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
-    glDeleteBuffers(1, &outGLBuffer);    outGLBuffer = 0;
+    glDeleteBuffers(1, &inGLBuffer);
+    inGLBuffer = 0;
+    glDeleteBuffers(1, &outGLBuffer);
+    outGLBuffer = 0;
 
     return 0;
 }
 
-int test_buffers( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_buffers(cl_device_id device, cl_context context,
+                 cl_command_queue queue, int numElements)
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    ExplicitType vecType[] = {
+        kChar, kUChar, kShort, kUShort, kInt,
+        kUInt, kLong,  kULong, kFloat,  kNumExplicitTypes
+    };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex;
     int retVal = 0;
     RandomSeed seed(gRandomSeed);
 
 
-    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
     {
-        for( index = 0; vecSizes[ index ] != 0; index++ )
+        for (index = 0; vecSizes[index] != 0; index++)
         {
             // Test!
-            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 0, seed) != 0 )
+            if (test_buffer_kernel(context, queue, vecType[typeIndex],
+                                   vecSizes[index], numElements, 0, seed)
+                != 0)
             {
-                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
-                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                char sizeNames[][4] = { "", "", "2", "", "4", "", "", "",  "8",
+                                        "", "", "",  "", "",  "", "", "16" };
+                log_error("   Buffer test %s%s FAILED\n",
+                          get_explicit_type_name(vecType[typeIndex]),
+                          sizeNames[vecSizes[index]]);
                 retVal++;
             }
         }
     }
 
     return retVal;
-
 }
 
 
-int test_buffers_getinfo( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_buffers_getinfo(cl_device_id device, cl_context context,
+                         cl_command_queue queue, int numElements)
 {
-    ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kNumExplicitTypes };
+    ExplicitType vecType[] = {
+        kChar, kUChar, kShort, kUShort, kInt,
+        kUInt, kLong,  kULong, kFloat,  kNumExplicitTypes
+    };
     unsigned int vecSizes[] = { 1, 2, 4, 8, 16, 0 };
     unsigned int index, typeIndex;
     int retVal = 0;
-    RandomSeed seed( gRandomSeed );
+    RandomSeed seed(gRandomSeed);
 
 
-    for( typeIndex = 0; vecType[ typeIndex ] != kNumExplicitTypes; typeIndex++ )
+    for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
     {
-        for( index = 0; vecSizes[ index ] != 0; index++ )
+        for (index = 0; vecSizes[index] != 0; index++)
         {
             // Test!
-            if( test_buffer_kernel( context, queue, vecType[ typeIndex ], vecSizes[ index ], numElements, 1, seed ) != 0 )
+            if (test_buffer_kernel(context, queue, vecType[typeIndex],
+                                   vecSizes[index], numElements, 1, seed)
+                != 0)
             {
-                char sizeNames[][ 4 ] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
-                log_error( "   Buffer test %s%s FAILED\n", get_explicit_type_name( vecType[ typeIndex ] ), sizeNames[ vecSizes[ index ] ] );
+                char sizeNames[][4] = { "", "", "2", "", "4", "", "", "",  "8",
+                                        "", "", "",  "", "",  "", "", "16" };
+                log_error("   Buffer test %s%s FAILED\n",
+                          get_explicit_type_name(vecType[typeIndex]),
+                          sizeNames[vecSizes[index]]);
                 retVal++;
             }
         }
     }
 
     return retVal;
-
 }
-
-
-
diff --git a/test_conformance/gl/test_fence_sync.cpp b/test_conformance/gl/test_fence_sync.cpp
index 00bf2cc9..35cc62de 100644
--- a/test_conformance/gl/test_fence_sync.cpp
+++ b/test_conformance/gl/test_fence_sync.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,7 +17,7 @@
 #include "gl/setup.h"
 #include "harness/genericThread.h"
 
-#if defined( __APPLE__ )
+#if defined(__APPLE__)
 #include <OpenGL/glu.h>
 #else
 #include <GL/glu.h>
@@ -40,112 +40,121 @@ typedef struct __GLsync *GLsync;
 #define APIENTRY
 #endif
 
-typedef GLsync (APIENTRY *glFenceSyncPtr)(GLenum condition,GLbitfield flags);
+typedef GLsync(APIENTRY *glFenceSyncPtr)(GLenum condition, GLbitfield flags);
 glFenceSyncPtr glFenceSyncFunc;
 
-typedef bool (APIENTRY *glIsSyncPtr)(GLsync sync);
+typedef bool(APIENTRY *glIsSyncPtr)(GLsync sync);
 glIsSyncPtr glIsSyncFunc;
 
-typedef void (APIENTRY *glDeleteSyncPtr)(GLsync sync);
+typedef void(APIENTRY *glDeleteSyncPtr)(GLsync sync);
 glDeleteSyncPtr glDeleteSyncFunc;
 
-typedef GLenum (APIENTRY *glClientWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+typedef GLenum(APIENTRY *glClientWaitSyncPtr)(GLsync sync, GLbitfield flags,
+                                              GLuint64 timeout);
 glClientWaitSyncPtr glClientWaitSyncFunc;
 
-typedef void (APIENTRY *glWaitSyncPtr)(GLsync sync,GLbitfield flags,GLuint64 timeout);
+typedef void(APIENTRY *glWaitSyncPtr)(GLsync sync, GLbitfield flags,
+                                      GLuint64 timeout);
 glWaitSyncPtr glWaitSyncFunc;
 
-typedef void (APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
+typedef void(APIENTRY *glGetInteger64vPtr)(GLenum pname, GLint64 *params);
 glGetInteger64vPtr glGetInteger64vFunc;
 
-typedef void (APIENTRY *glGetSyncivPtr)(GLsync sync,GLenum pname,GLsizei bufSize,GLsizei *length,
-                               GLint *values);
+typedef void(APIENTRY *glGetSyncivPtr)(GLsync sync, GLenum pname,
+                                       GLsizei bufSize, GLsizei *length,
+                                       GLint *values);
 glGetSyncivPtr glGetSyncivFunc;
 
 #define CHK_GL_ERR() printf("%s\n", gluErrorString(glGetError()))
 
-static void InitSyncFns( void )
+static void InitSyncFns(void)
 {
-    glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress( "glFenceSync" );
-    glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress( "glIsSync" );
-    glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress( "glDeleteSync" );
-    glClientWaitSyncFunc = (glClientWaitSyncPtr)glutGetProcAddress( "glClientWaitSync" );
-    glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress( "glWaitSync" );
-    glGetInteger64vFunc = (glGetInteger64vPtr)glutGetProcAddress( "glGetInteger64v" );
-    glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress( "glGetSynciv" );
+    glFenceSyncFunc = (glFenceSyncPtr)glutGetProcAddress("glFenceSync");
+    glIsSyncFunc = (glIsSyncPtr)glutGetProcAddress("glIsSync");
+    glDeleteSyncFunc = (glDeleteSyncPtr)glutGetProcAddress("glDeleteSync");
+    glClientWaitSyncFunc =
+        (glClientWaitSyncPtr)glutGetProcAddress("glClientWaitSync");
+    glWaitSyncFunc = (glWaitSyncPtr)glutGetProcAddress("glWaitSync");
+    glGetInteger64vFunc =
+        (glGetInteger64vPtr)glutGetProcAddress("glGetInteger64v");
+    glGetSyncivFunc = (glGetSyncivPtr)glutGetProcAddress("glGetSynciv");
 }
 #ifndef GL_ARB_sync
-#define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
+#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
 
-#define GL_OBJECT_TYPE            0x9112
-#define GL_SYNC_CONDITION            0x9113
-#define GL_SYNC_STATUS            0x9114
-#define GL_SYNC_FLAGS            0x9115
+#define GL_OBJECT_TYPE 0x9112
+#define GL_SYNC_CONDITION 0x9113
+#define GL_SYNC_STATUS 0x9114
+#define GL_SYNC_FLAGS 0x9115
 
-#define GL_SYNC_FENCE            0x9116
+#define GL_SYNC_FENCE 0x9116
 
-#define GL_SYNC_GPU_COMMANDS_COMPLETE    0x9117
+#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
 
-#define GL_UNSIGNALED            0x9118
-#define GL_SIGNALED            0x9119
+#define GL_UNSIGNALED 0x9118
+#define GL_SIGNALED 0x9119
 
-#define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
+#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
 
-#define GL_TIMEOUT_IGNORED            0xFFFFFFFFFFFFFFFFull
+#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
 
-#define GL_ALREADY_SIGNALED        0x911A
-#define GL_TIMEOUT_EXPIRED            0x911B
-#define GL_CONDITION_SATISFIED        0x911C
-#define GL_WAIT_FAILED            0x911D
+#define GL_ALREADY_SIGNALED 0x911A
+#define GL_TIMEOUT_EXPIRED 0x911B
+#define GL_CONDITION_SATISFIED 0x911C
+#define GL_WAIT_FAILED 0x911D
 #endif
 
 #define USING_ARB_sync 1
 #endif
 
-typedef cl_event (CL_API_CALL *clCreateEventFromGLsyncKHR_fn)( cl_context context, GLsync sync, cl_int *errCode_ret) ;
+typedef cl_event(CL_API_CALL *clCreateEventFromGLsyncKHR_fn)(
+    cl_context context, GLsync sync, cl_int *errCode_ret);
 
 clCreateEventFromGLsyncKHR_fn clCreateEventFromGLsyncKHR_ptr;
 
 
 static const char *updateBuffersKernel[] = {
-    "__kernel void update( __global float4 * vertices, __global float4 *colors, int horizWrap, int rowIdx )\n"
+    "__kernel void update( __global float4 * vertices, __global float4 "
+    "*colors, int horizWrap, int rowIdx )\n"
     "{\n"
     "    size_t tid = get_global_id(0);\n"
     "\n"
     "    size_t xVal = ( tid & ( horizWrap - 1 ) );\n"
     "    vertices[ tid * 2 + 0 ] = (float4)( xVal, rowIdx*16.f, 0.0f, 1.f );\n"
-    "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, 1.f );\n"
+    "    vertices[ tid * 2 + 1 ] = (float4)( xVal, rowIdx*16.f + 4.0f, 0.0f, "
+    "1.f );\n"
     "\n"
     "    int rowV = rowIdx + 1;\n"
-    "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 ) >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
-    "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, 1.0f, 1.0f, 1.0f );\n"
+    "    colors[ tid * 2 + 0 ] = (float4)( ( rowV & 1 ) / 255.f, ( ( rowV & 2 "
+    ") >> 1 ) / 255.f, ( ( rowV & 4 ) >> 2 ) / 255.f, 1.f );\n"
+    "    //colors[ tid * 2 + 0 ] = (float4)( (float)xVal/(float)horizWrap, "
+    "1.0f, 1.0f, 1.0f );\n"
     "    colors[ tid * 2 + 1 ] = colors[ tid * 2 + 0 ];\n"
-    "}\n" };
-
-//Passthrough VertexShader
-static const char *vertexshader =
-"#version 150\n"
-"uniform mat4 projMatrix;\n"
-"in vec4 inPosition;\n"
-"in vec4 inColor;\n"
-"out vec4 vertColor;\n"
-"void main (void) {\n"
-"    gl_Position = projMatrix*inPosition;\n"
-"   vertColor = inColor;\n"
-"}\n";
-
-//Passthrough FragmentShader
-static const char *fragmentshader =
-"#version 150\n"
-"in vec4 vertColor;\n"
-"out vec4 outColor;\n"
-"void main (void) {\n"
-"    outColor = vertColor;\n"
-"}\n";
+    "}\n"
+};
+
+// Passthrough VertexShader
+static const char *vertexshader = "#version 150\n"
+                                  "uniform mat4 projMatrix;\n"
+                                  "in vec4 inPosition;\n"
+                                  "in vec4 inColor;\n"
+                                  "out vec4 vertColor;\n"
+                                  "void main (void) {\n"
+                                  "    gl_Position = projMatrix*inPosition;\n"
+                                  "   vertColor = inColor;\n"
+                                  "}\n";
+
+// Passthrough FragmentShader
+static const char *fragmentshader = "#version 150\n"
+                                    "in vec4 vertColor;\n"
+                                    "out vec4 outColor;\n"
+                                    "void main (void) {\n"
+                                    "    outColor = vertColor;\n"
+                                    "}\n";
 
 GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
 {
-    GLint  logLength, status;
+    GLint logLength, status;
     GLuint program = glCreateProgram();
     GLuint vpShader;
 
@@ -153,8 +162,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
     glShaderSource(vpShader, 1, (const GLchar **)&vertexshader, NULL);
     glCompileShader(vpShader);
     glGetShaderiv(vpShader, GL_INFO_LOG_LENGTH, &logLength);
-    if (logLength > 0) {
-        GLchar *log = (GLchar*) malloc(logLength);
+    if (logLength > 0)
+    {
+        GLchar *log = (GLchar *)malloc(logLength);
         glGetShaderInfoLog(vpShader, logLength, &logLength, log);
         log_info("Vtx Shader compile log:\n%s", log);
         free(log);
@@ -175,8 +185,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
     glCompileShader(fpShader);
 
     glGetShaderiv(fpShader, GL_INFO_LOG_LENGTH, &logLength);
-    if (logLength > 0) {
-        GLchar *log = (GLchar*)malloc(logLength);
+    if (logLength > 0)
+    {
+        GLchar *log = (GLchar *)malloc(logLength);
         glGetShaderInfoLog(fpShader, logLength, &logLength, log);
         log_info("Frag Shader compile log:\n%s", log);
         free(log);
@@ -192,8 +203,9 @@ GLuint createShaderProgram(GLint *posLoc, GLint *colLoc)
 
     glLinkProgram(program);
     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &logLength);
-    if (logLength > 0) {
-        GLchar *log = (GLchar*)malloc(logLength);
+    if (logLength > 0)
+    {
+        GLchar *log = (GLchar *)malloc(logLength);
         glGetProgramInfoLog(program, logLength, &logLength, log);
         log_info("Program link log:\n%s", log);
         free(log);
@@ -219,7 +231,7 @@ void destroyShaderProgram(GLuint program)
     glUseProgram(0);
     glGetAttachedShaders(program, 2, &count, shaders);
     int i;
-    for(i = 0; i < count; i++)
+    for (i = 0; i < count; i++)
     {
         glDetachShader(program, shaders[i]);
         glDeleteShader(shaders[i]);
@@ -227,44 +239,49 @@ void destroyShaderProgram(GLuint program)
     glDeleteProgram(program);
 }
 
-// This function queues up and runs the above CL kernel that writes the vertex data
-cl_int run_cl_kernel( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1,
-                     cl_int rowIdx, cl_event fenceEvent, size_t numThreads )
+// This function queues up and runs the above CL kernel that writes the vertex
+// data
+cl_int run_cl_kernel(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
+                     cl_mem stream1, cl_int rowIdx, cl_event fenceEvent,
+                     size_t numThreads)
 {
-    cl_int error = clSetKernelArg( kernel, 3, sizeof( rowIdx ), &rowIdx );
-    test_error( error, "Unable to set kernel arguments" );
+    cl_int error = clSetKernelArg(kernel, 3, sizeof(rowIdx), &rowIdx);
+    test_error(error, "Unable to set kernel arguments");
 
     clEventWrapper acqEvent1, acqEvent2, kernEvent, relEvent1, relEvent2;
-    int numEvents = ( fenceEvent != NULL ) ? 1 : 0;
-    cl_event *fence_evt = ( fenceEvent != NULL ) ? &fenceEvent : NULL;
+    int numEvents = (fenceEvent != NULL) ? 1 : 0;
+    cl_event *fence_evt = (fenceEvent != NULL) ? &fenceEvent : NULL;
 
-    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream0, numEvents, fence_evt, &acqEvent1 );
-    test_error( error, "Unable to acquire GL obejcts");
-    error = (*clEnqueueAcquireGLObjects_ptr)( queue, 1, &stream1, numEvents, fence_evt, &acqEvent2 );
-    test_error( error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream0, numEvents,
+                                             fence_evt, &acqEvent1);
+    test_error(error, "Unable to acquire GL obejcts");
+    error = (*clEnqueueAcquireGLObjects_ptr)(queue, 1, &stream1, numEvents,
+                                             fence_evt, &acqEvent2);
+    test_error(error, "Unable to acquire GL obejcts");
 
-    cl_event evts[ 2 ] = { acqEvent1, acqEvent2 };
+    cl_event evts[2] = { acqEvent1, acqEvent2 };
 
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numThreads, NULL, 2, evts, &kernEvent );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &numThreads, NULL, 2,
+                                   evts, &kernEvent);
+    test_error(error, "Unable to execute test kernel");
 
-    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream0, 1, &kernEvent, &relEvent1 );
+    error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream0, 1, &kernEvent,
+                                             &relEvent1);
     test_error(error, "clEnqueueReleaseGLObjects failed");
-    error = (*clEnqueueReleaseGLObjects_ptr)( queue, 1, &stream1, 1, &kernEvent, &relEvent2 );
+    error = (*clEnqueueReleaseGLObjects_ptr)(queue, 1, &stream1, 1, &kernEvent,
+                                             &relEvent2);
     test_error(error, "clEnqueueReleaseGLObjects failed");
 
-    evts[ 0 ] = relEvent1;
-    evts[ 1 ] = relEvent2;
-    error = clWaitForEvents( 2, evts );
-    test_error( error, "Unable to wait for release events" );
+    evts[0] = relEvent1;
+    evts[1] = relEvent2;
+    error = clWaitForEvents(2, evts);
+    test_error(error, "Unable to wait for release events");
 
     return 0;
 }
 
-class RunThread : public genericThread
-{
+class RunThread : public genericThread {
 public:
-
     cl_kernel mKernel;
     cl_command_queue mQueue;
     cl_mem mStream0, mStream1;
@@ -272,34 +289,40 @@ public:
     cl_event mFenceEvent;
     size_t mNumThreads;
 
-    RunThread( cl_kernel kernel, cl_command_queue queue, cl_mem stream0, cl_mem stream1, size_t numThreads )
-    : mKernel( kernel ), mQueue( queue ), mStream0( stream0 ), mStream1( stream1 ), mNumThreads( numThreads )
-    {
-    }
+    RunThread(cl_kernel kernel, cl_command_queue queue, cl_mem stream0,
+              cl_mem stream1, size_t numThreads)
+        : mKernel(kernel), mQueue(queue), mStream0(stream0), mStream1(stream1),
+          mNumThreads(numThreads)
+    {}
 
-    void SetRunData( cl_int rowIdx, cl_event fenceEvent )
+    void SetRunData(cl_int rowIdx, cl_event fenceEvent)
     {
         mRowIdx = rowIdx;
         mFenceEvent = fenceEvent;
     }
 
-    virtual void * IRun( void )
+    virtual void *IRun(void)
     {
-        cl_int error = run_cl_kernel( mKernel, mQueue, mStream0, mStream1, mRowIdx, mFenceEvent, mNumThreads );
+        cl_int error = run_cl_kernel(mKernel, mQueue, mStream0, mStream1,
+                                     mRowIdx, mFenceEvent, mNumThreads);
         return (void *)(uintptr_t)error;
     }
 };
 
 
-int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_queue queue, bool separateThreads, GLint rend_vs, GLint read_vs, cl_device_id rend_device )
+int test_fence_sync_single(cl_device_id device, cl_context context,
+                           cl_command_queue queue, bool separateThreads,
+                           GLint rend_vs, GLint read_vs,
+                           cl_device_id rend_device)
 {
     int error;
     const int framebufferSize = 512;
 
 
-    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    if (!is_extension_available(device, "cl_khr_gl_event"))
     {
-        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        log_info("NOTE: cl_khr_gl_event extension not present on this device; "
+                 "skipping fence sync test\n");
         return 0;
     }
 
@@ -312,10 +335,11 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
     clGetPlatformIDs(0, NULL, &nplatforms);
     clGetPlatformIDs(1, &platform, NULL);
 
-    if (nplatforms > 1) {
+    if (nplatforms > 1)
+    {
         log_info("clGetPlatformIDs returned multiple values.  This is not "
-            "an error, but might result in obtaining incorrect function "
-            "pointers if you do not want the first returned platform.\n");
+                 "an error, but might result in obtaining incorrect function "
+                 "pointers if you do not want the first returned platform.\n");
 
         // Show them the platform name, in case it is a problem.
 
@@ -323,28 +347,35 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
         char *name;
 
         clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &size);
-        name = (char*)malloc(size);
+        name = (char *)malloc(size);
         clGetPlatformInfo(platform, CL_PLATFORM_NAME, size, name, NULL);
 
         log_info("Using platform with name: %s \n", name);
         free(name);
     }
 
-    clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncKHR");
-    if( clCreateEventFromGLsyncKHR_ptr == NULL )
+    clCreateEventFromGLsyncKHR_ptr =
+        (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(
+            platform, "clCreateEventFromGLsyncKHR");
+    if (clCreateEventFromGLsyncKHR_ptr == NULL)
     {
-        log_error( "ERROR: Unable to run fence_sync test (clCreateEventFromGLsyncKHR function not discovered!)\n" );
-        clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)clGetExtensionFunctionAddressForPlatform(platform, "clCreateEventFromGLsyncAPPLE");
+        log_error("ERROR: Unable to run fence_sync test "
+                  "(clCreateEventFromGLsyncKHR function not discovered!)\n");
+        clCreateEventFromGLsyncKHR_ptr = (clCreateEventFromGLsyncKHR_fn)
+            clGetExtensionFunctionAddressForPlatform(
+                platform, "clCreateEventFromGLsyncAPPLE");
         return -1;
     }
 
 #ifdef USING_ARB_sync
-    char *gl_version_str = (char*)glGetString( GL_VERSION );
+    char *gl_version_str = (char *)glGetString(GL_VERSION);
     float glCoreVersion;
     sscanf(gl_version_str, "%f", &glCoreVersion);
-    if( glCoreVersion < 3.0f )
+    if (glCoreVersion < 3.0f)
     {
-        log_info( "OpenGL version %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        log_info(
+            "OpenGL version %f does not support fence/sync! Skipping test.\n",
+            glCoreVersion);
         return 0;
     }
 
@@ -354,10 +385,13 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
     GLint val, screen;
     CGLGetVirtualScreen(currCtx, &screen);
     CGLDescribePixelFormat(pixFmt, screen, kCGLPFAOpenGLProfile, &val);
-    if(val != kCGLOGLPVersion_3_2_Core)
+    if (val != kCGLOGLPVersion_3_2_Core)
     {
-        log_error( "OpenGL context was not created with OpenGL version >= 3.0 profile even though platform supports it"
-                  "OpenGL profile %f does not support fence/sync! Skipping test.\n", glCoreVersion );
+        log_error(
+            "OpenGL context was not created with OpenGL version >= 3.0 profile "
+            "even though platform supports it"
+            "OpenGL profile %f does not support fence/sync! Skipping test.\n",
+            glCoreVersion);
         return -1;
     }
 #else
@@ -365,7 +399,7 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
     HDC hdc = wglGetCurrentDC();
     HGLRC hglrc = wglGetCurrentContext();
 #else
-    Display* dpy = glXGetCurrentDisplay();
+    Display *dpy = glXGetCurrentDisplay();
     GLXDrawable drawable = glXGetCurrentDrawable();
     GLXContext ctx = glXGetCurrentContext();
 #endif
@@ -386,51 +420,66 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
 
     GLint posLoc, colLoc;
     GLuint shaderprogram = createShaderProgram(&posLoc, &colLoc);
-    if(!shaderprogram)
+    if (!shaderprogram)
     {
         log_error("Failed to create shader program\n");
         return -1;
     }
 
-    float l = 0.0f; float r = framebufferSize;
-    float b = 0.0f; float t = framebufferSize;
-
-    float projMatrix[16] = { 2.0f/(r-l), 0.0f, 0.0f, 0.0f,
-        0.0f, 2.0f/(t-b), 0.0f, 0.0f,
-        0.0f, 0.0f, -1.0f, 0.0f,
-        -(r+l)/(r-l), -(t+b)/(t-b), 0.0f, 1.0f
-    };
+    float l = 0.0f;
+    float r = framebufferSize;
+    float b = 0.0f;
+    float t = framebufferSize;
+
+    float projMatrix[16] = { 2.0f / (r - l),
+                             0.0f,
+                             0.0f,
+                             0.0f,
+                             0.0f,
+                             2.0f / (t - b),
+                             0.0f,
+                             0.0f,
+                             0.0f,
+                             0.0f,
+                             -1.0f,
+                             0.0f,
+                             -(r + l) / (r - l),
+                             -(t + b) / (t - b),
+                             0.0f,
+                             1.0f };
 
     glUseProgram(shaderprogram);
     GLuint projMatLoc = glGetUniformLocation(shaderprogram, "projMatrix");
     glUniformMatrix4fv(projMatLoc, 1, 0, projMatrix);
     glUseProgram(0);
 
-    // Note: the framebuffer is just the target to verify our results against, so we don't
-    // really care to go through all the possible formats in this case
+    // Note: the framebuffer is just the target to verify our results against,
+    // so we don't really care to go through all the possible formats in this
+    // case
     glFramebufferWrapper glFramebuffer;
     glRenderbufferWrapper glRenderbuffer;
-    error = CreateGLRenderbufferRaw( framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT,
-                                    GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
-                                    &glFramebuffer, &glRenderbuffer );
-    if( error != 0 )
-        return error;
+    error = CreateGLRenderbufferRaw(
+        framebufferSize, 128, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA, GL_RGBA,
+        GL_UNSIGNED_INT_8_8_8_8_REV, &glFramebuffer, &glRenderbuffer);
+    if (error != 0) return error;
 
     GLuint vao;
     glGenVertexArrays(1, &vao);
     glBindVertexArray(vao);
 
     glBufferWrapper vtxBuffer, colorBuffer;
-    glGenBuffers( 1, &vtxBuffer );
-    glGenBuffers( 1, &colorBuffer );
+    glGenBuffers(1, &vtxBuffer);
+    glGenBuffers(1, &colorBuffer);
 
-    const int numHorizVertices = ( framebufferSize * 64 ) + 1;
+    const int numHorizVertices = (framebufferSize * 64) + 1;
 
-    glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
-    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+    glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
+                 NULL, GL_STATIC_DRAW);
 
-    glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
-    glBufferData( GL_ARRAY_BUFFER, sizeof( GLfloat ) * numHorizVertices * 2 * 4, NULL, GL_STATIC_DRAW );
+    glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
+    glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * numHorizVertices * 2 * 4,
+                 NULL, GL_STATIC_DRAW);
 
     // Now that the requisite objects are bound, we can attempt program
     // validation:
@@ -439,8 +488,9 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
 
     GLint logLength, status;
     glGetProgramiv(shaderprogram, GL_INFO_LOG_LENGTH, &logLength);
-    if (logLength > 0) {
-        GLchar *log = (GLchar*)malloc(logLength);
+    if (logLength > 0)
+    {
+        GLchar *log = (GLchar *)malloc(logLength);
         glGetProgramInfoLog(shaderprogram, logLength, &logLength, log);
         log_info("Program validate log:\n%s", log);
         free(log);
@@ -455,125 +505,131 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
 
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper streams[ 2 ];
+    clMemWrapper streams[2];
 
-    if( create_single_kernel_helper( context, &program, &kernel, 1, updateBuffersKernel, "update" ) )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    updateBuffersKernel, "update"))
         return -1;
 
-    streams[ 0 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, vtxBuffer, &error );
-    test_error( error, "Unable to create CL buffer from GL vertex buffer" );
+    streams[0] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
+                                             vtxBuffer, &error);
+    test_error(error, "Unable to create CL buffer from GL vertex buffer");
 
-    streams[ 1 ] = (*clCreateFromGLBuffer_ptr)( context, CL_MEM_READ_WRITE, colorBuffer, &error );
-    test_error( error, "Unable to create CL buffer from GL color buffer" );
+    streams[1] = (*clCreateFromGLBuffer_ptr)(context, CL_MEM_READ_WRITE,
+                                             colorBuffer, &error);
+    test_error(error, "Unable to create CL buffer from GL color buffer");
 
-    error = clSetKernelArg( kernel, 0, sizeof( streams[ 0 ] ), &streams[ 0 ] );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set kernel arguments");
 
-    error = clSetKernelArg( kernel, 1, sizeof( streams[ 1 ] ), &streams[ 1 ] );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set kernel arguments");
 
     cl_int horizWrap = (cl_int)framebufferSize;
-    error = clSetKernelArg( kernel, 2, sizeof( horizWrap ), &horizWrap );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 2, sizeof(horizWrap), &horizWrap);
+    test_error(error, "Unable to set kernel arguments");
 
-    glViewport( 0, 0, framebufferSize, framebufferSize );
-    glClearColor( 0, 0, 0, 0 );
-    glClear( GL_COLOR_BUFFER_BIT );
-    glClear( GL_DEPTH_BUFFER_BIT );
-    glDisable( GL_DEPTH_TEST );
-    glEnable( GL_BLEND );
-    glBlendFunc( GL_ONE, GL_ONE );
+    glViewport(0, 0, framebufferSize, framebufferSize);
+    glClearColor(0, 0, 0, 0);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glClear(GL_DEPTH_BUFFER_BIT);
+    glDisable(GL_DEPTH_TEST);
+    glEnable(GL_BLEND);
+    glBlendFunc(GL_ONE, GL_ONE);
 
     clEventWrapper fenceEvent;
     GLsync glFence = 0;
 
     // Do a loop through 8 different horizontal stripes against the framebuffer
-    RunThread thread( kernel, queue, streams[ 0 ], streams[ 1 ], (size_t)numHorizVertices );
+    RunThread thread(kernel, queue, streams[0], streams[1],
+                     (size_t)numHorizVertices);
 
-    for( int i = 0; i < 8; i++ )
+    for (int i = 0; i < 8; i++)
     {
         // if current rendering device is not the compute device and
         // separateThreads == false which means compute is going on same
         // thread and we are using implicit synchronization (no GLSync obj used)
-        // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we need
-        // to wait for rendering to finish on other device before CL can start
-        // writing to CL/GL shared mem objects. When separateThreads is true i.e.
-        // we are using GLSync obj to synchronize then we dont need to call glFinish
-        // here since CL should wait for rendering on other device before this
-        // GLSync object to finish before it starts writing to shared mem object.
-        // Also rend_device == compute_device no need to call glFinish
-        if(rend_device != device && !separateThreads)
-            glFinish();
-
-        if( separateThreads )
+        // then glFlush by clEnqueueAcquireGLObject is not sufficient ... we
+        // need to wait for rendering to finish on other device before CL can
+        // start writing to CL/GL shared mem objects. When separateThreads is
+        // true i.e. we are using GLSync obj to synchronize then we dont need to
+        // call glFinish here since CL should wait for rendering on other device
+        // before this GLSync object to finish before it starts writing to
+        // shared mem object. Also rend_device == compute_device no need to call
+        // glFinish
+        if (rend_device != device && !separateThreads) glFinish();
+
+        if (separateThreads)
         {
-            if (fenceEvent != NULL)
-            {
-                clReleaseEvent(fenceEvent);
-                glDeleteSyncFunc(glFence);
-            }
+            glDeleteSyncFunc(glFence);
 
             glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-            fenceEvent = clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
+            fenceEvent =
+                clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
             test_error(error, "Unable to create CL event from GL fence");
 
-            // in case of explicit synchronization, we just wait for the sync object to complete
-            // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
-            // to flush on the context on which glSync is created
+            // in case of explicit synchronization, we just wait for the sync
+            // object to complete in clEnqueueAcquireGLObject but we dont flush.
+            // Its application's responsibility to flush on the context on which
+            // glSync is created
             glFlush();
 
-            thread.SetRunData( (cl_int)i, fenceEvent );
+            thread.SetRunData((cl_int)i, fenceEvent);
             thread.Start();
 
             error = (cl_int)(size_t)thread.Join();
         }
         else
         {
-            error = run_cl_kernel( kernel, queue, streams[ 0 ], streams[ 1 ], (cl_int)i, fenceEvent, (size_t)numHorizVertices );
+            error =
+                run_cl_kernel(kernel, queue, streams[0], streams[1], (cl_int)i,
+                              fenceEvent, (size_t)numHorizVertices);
         }
-        test_error( error, "Unable to run CL kernel" );
+        test_error(error, "Unable to run CL kernel");
 
         glUseProgram(shaderprogram);
         glEnableVertexAttribArray(posLoc);
         glEnableVertexAttribArray(colLoc);
-        glBindBuffer( GL_ARRAY_BUFFER, vtxBuffer );
-        glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
-        glBindBuffer( GL_ARRAY_BUFFER, colorBuffer );
-        glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE, 4*sizeof(GLfloat), 0);
-        glBindBuffer( GL_ARRAY_BUFFER, 0 );
+        glBindBuffer(GL_ARRAY_BUFFER, vtxBuffer);
+        glVertexAttribPointer(posLoc, 4, GL_FLOAT, GL_FALSE,
+                              4 * sizeof(GLfloat), 0);
+        glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
+        glVertexAttribPointer(colLoc, 4, GL_FLOAT, GL_FALSE,
+                              4 * sizeof(GLfloat), 0);
+        glBindBuffer(GL_ARRAY_BUFFER, 0);
 
-        glDrawArrays( GL_TRIANGLE_STRIP, 0, numHorizVertices * 2 );
+        glDrawArrays(GL_TRIANGLE_STRIP, 0, numHorizVertices * 2);
 
         glDisableVertexAttribArray(posLoc);
         glDisableVertexAttribArray(colLoc);
         glUseProgram(0);
 
-        if( separateThreads )
+        if (separateThreads)
         {
-            // If we're on the same thread, then we're testing implicit syncing, so we
-            // don't need the actual fence code
-            if( fenceEvent != NULL )
-            {
-                clReleaseEvent( fenceEvent );
-                glDeleteSyncFunc( glFence );
-            }
+            // If we're on the same thread, then we're testing implicit syncing,
+            // so we don't need the actual fence code
+            glDeleteSyncFunc(glFence);
+
 
-            glFence = glFenceSyncFunc( GL_SYNC_GPU_COMMANDS_COMPLETE, 0 );
-            fenceEvent = clCreateEventFromGLsyncKHR_ptr( context, glFence, &error );
-            test_error( error, "Unable to create CL event from GL fence" );
+            glFence = glFenceSyncFunc(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+            fenceEvent =
+                clCreateEventFromGLsyncKHR_ptr(context, glFence, &error);
+            test_error(error, "Unable to create CL event from GL fence");
 
-            // in case of explicit synchronization, we just wait for the sync object to complete
-            // in clEnqueueAcquireGLObject but we dont flush. Its application's responsibility
-            // to flush on the context on which glSync is created
+            // in case of explicit synchronization, we just wait for the sync
+            // object to complete in clEnqueueAcquireGLObject but we dont flush.
+            // Its application's responsibility to flush on the context on which
+            // glSync is created
             glFlush();
         }
         else
             glFinish();
     }
 
-    if( glFence != 0 )
-        // Don't need the final release for fenceEvent, because the wrapper will take care of that
-        glDeleteSyncFunc( glFence );
+    if (glFence != 0)
+        // Don't need the final release for fenceEvent, because the wrapper will
+        // take care of that
+        glDeleteSyncFunc(glFence);
 
 #ifdef __APPLE__
     CGLSetVirtualScreen(CGLGetCurrentContext(), read_vs);
@@ -585,54 +641,62 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
 #endif
 #endif
     // Grab the contents of the final framebuffer
-    BufferOwningPtr<char> resultData( ReadGLRenderbuffer( glFramebuffer, glRenderbuffer,
-                                                         GL_COLOR_ATTACHMENT0_EXT,
-                                                         GL_RGBA, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar,
-                                                         framebufferSize, 128 ) );
-
-    // Check the contents now. We should end up with solid color bands 32 pixels high and the
-    // full width of the framebuffer, at values (128,128,128) due to the additive blending
-    for( int i = 0; i < 8; i++ )
+    BufferOwningPtr<char> resultData(ReadGLRenderbuffer(
+        glFramebuffer, glRenderbuffer, GL_COLOR_ATTACHMENT0_EXT, GL_RGBA,
+        GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, kUChar, framebufferSize, 128));
+
+    // Check the contents now. We should end up with solid color bands 32 pixels
+    // high and the full width of the framebuffer, at values (128,128,128) due
+    // to the additive blending
+    for (int i = 0; i < 8; i++)
     {
-        for( int y = 0; y < 4; y++ )
+        for (int y = 0; y < 4; y++)
         {
-            // Note: coverage will be double because the 63-0 triangle overwrites again at the end of the pass
-            cl_uchar valA = ( ( ( i + 1 ) & 1 )      ) * numHorizVertices * 2 / framebufferSize;
-            cl_uchar valB = ( ( ( i + 1 ) & 2 ) >> 1 ) * numHorizVertices * 2 / framebufferSize;
-            cl_uchar valC = ( ( ( i + 1 ) & 4 ) >> 2 ) * numHorizVertices * 2 / framebufferSize;
-
-            cl_uchar *row = (cl_uchar *)&resultData[ ( i * 16 + y ) * framebufferSize * 4 ];
-            for( int x = 0; x < ( framebufferSize - 1 ) - 1; x++ )
+            // Note: coverage will be double because the 63-0 triangle
+            // overwrites again at the end of the pass
+            cl_uchar valA =
+                (((i + 1) & 1)) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valB =
+                (((i + 1) & 2) >> 1) * numHorizVertices * 2 / framebufferSize;
+            cl_uchar valC =
+                (((i + 1) & 4) >> 2) * numHorizVertices * 2 / framebufferSize;
+
+            cl_uchar *row =
+                (cl_uchar *)&resultData[(i * 16 + y) * framebufferSize * 4];
+            for (int x = 0; x < (framebufferSize - 1) - 1; x++)
             {
-                if( ( row[ x * 4 ] != valA ) || ( row[ x * 4 + 1 ] != valB ) ||
-                   ( row[ x * 4 + 2 ] != valC ) )
+                if ((row[x * 4] != valA) || (row[x * 4 + 1] != valB)
+                    || (row[x * 4 + 2] != valC))
                 {
-                    log_error( "ERROR: Output framebuffer did not validate!\n" );
-                    DumpGLBuffer( GL_UNSIGNED_BYTE, framebufferSize, 128, resultData );
-                    log_error( "RUNS:\n" );
+                    log_error("ERROR: Output framebuffer did not validate!\n");
+                    DumpGLBuffer(GL_UNSIGNED_BYTE, framebufferSize, 128,
+                                 resultData);
+                    log_error("RUNS:\n");
                     uint32_t *p = (uint32_t *)(char *)resultData;
                     size_t a = 0;
-                    for( size_t t = 1; t < framebufferSize * framebufferSize; t++ )
+                    for (size_t t = 1; t < framebufferSize * framebufferSize;
+                         t++)
                     {
-                        if( p[ a ] != 0 )
+                        if (p[a] != 0)
                         {
-                            if( p[ t ] == 0 )
+                            if (p[t] == 0)
                             {
-                                log_error( "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n", a, t - 1,
-                                          (int)( a % framebufferSize ), (int)( a / framebufferSize ),
-                                          (int)( ( t - 1 ) % framebufferSize ), (int)( ( t - 1 ) / framebufferSize ),
-                                          p[ a ] );
+                                log_error(
+                                    "RUN: %ld to %ld (%d,%d to %d,%d) 0x%08x\n",
+                                    a, t - 1, (int)(a % framebufferSize),
+                                    (int)(a / framebufferSize),
+                                    (int)((t - 1) % framebufferSize),
+                                    (int)((t - 1) / framebufferSize), p[a]);
                                 a = t;
                             }
                         }
                         else
                         {
-                            if( p[ t ] != 0 )
+                            if (p[t] != 0)
                             {
                                 a = t;
                             }
                         }
-
                     }
                     return -1;
                 }
@@ -645,46 +709,56 @@ int test_fence_sync_single( cl_device_id device, cl_context context, cl_command_
     return 0;
 }
 
-int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue queue, int numElements )
+int test_fence_sync(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int numElements)
 {
     GLint vs_count = 0;
     cl_device_id *device_list = NULL;
 
-    if( !is_extension_available( device, "cl_khr_gl_event" ) )
+    if (!is_extension_available(device, "cl_khr_gl_event"))
     {
-        log_info( "NOTE: cl_khr_gl_event extension not present on this device; skipping fence sync test\n" );
+        log_info("NOTE: cl_khr_gl_event extension not present on this device; "
+                 "skipping fence sync test\n");
         return 0;
     }
 #ifdef __APPLE__
     CGLContextObj ctx = CGLGetCurrentContext();
     CGLPixelFormatObj pix = CGLGetPixelFormat(ctx);
-    CGLError err = CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
+    CGLError err =
+        CGLDescribePixelFormat(pix, 0, kCGLPFAVirtualScreenCount, &vs_count);
 
-    device_list = (cl_device_id *) malloc(sizeof(cl_device_id)*vs_count);
-    clGetGLContextInfoAPPLE(context, ctx, CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE, sizeof(cl_device_id)*vs_count, device_list, NULL);
+    device_list = (cl_device_id *)malloc(sizeof(cl_device_id) * vs_count);
+    clGetGLContextInfoAPPLE(context, ctx,
+                            CL_CGL_DEVICES_FOR_SUPPORTED_VIRTUAL_SCREENS_APPLE,
+                            sizeof(cl_device_id) * vs_count, device_list, NULL);
 #else
-    // Need platform specific way of getting devices from CL context to which OpenGL can render
-    // If not available it can be replaced with clGetContextInfo with CL_CONTEXT_DEVICES
+    // Need platform specific way of getting devices from CL context to which
+    // OpenGL can render If not available it can be replaced with
+    // clGetContextInfo with CL_CONTEXT_DEVICES
     size_t device_cb;
-    cl_int err = clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
-    if( err != CL_SUCCESS )
+    cl_int err =
+        clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &device_cb);
+    if (err != CL_SUCCESS)
     {
-      print_error( err, "Unable to get device count from context" );
-      return -1;
+        print_error(err, "Unable to get device count from context");
+        return -1;
     }
     vs_count = (GLint)device_cb / sizeof(cl_device_id);
 
-    if (vs_count < 1) {
-      log_error("No devices found.\n");
-      return -1;
+    if (vs_count < 1)
+    {
+        log_error("No devices found.\n");
+        return -1;
     }
 
-    device_list = (cl_device_id *) malloc(device_cb);
-    err = clGetContextInfo( context, CL_CONTEXT_DEVICES, device_cb, device_list, NULL);
-    if( err != CL_SUCCESS ) {
-      free(device_list);
-      print_error( err, "Unable to get device list from context" );
-      return -1;
+    device_list = (cl_device_id *)malloc(device_cb);
+    err = clGetContextInfo(context, CL_CONTEXT_DEVICES, device_cb, device_list,
+                           NULL);
+    if (err != CL_SUCCESS)
+    {
+        free(device_list);
+        print_error(err, "Unable to get device list from context");
+        return -1;
     }
 
 #endif
@@ -695,30 +769,38 @@ int test_fence_sync( cl_device_id device, cl_context context, cl_command_queue q
 
     // Loop through all the devices capable to OpenGL rendering
     // and set them as current rendering target
-    for(rend_vs = 0; rend_vs < vs_count; rend_vs++)
+    for (rend_vs = 0; rend_vs < vs_count; rend_vs++)
     {
         // Loop through all the devices and set them as current
         // compute target
-        for(read_vs = 0; read_vs < vs_count; read_vs++)
+        for (read_vs = 0; read_vs < vs_count; read_vs++)
         {
-            cl_device_id rend_device = device_list[rend_vs], read_device = device_list[read_vs];
+            cl_device_id rend_device = device_list[rend_vs],
+                         read_device = device_list[read_vs];
             char rend_name[200], read_name[200];
 
-            clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name), rend_name, NULL);
-            clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name), read_name, NULL);
+            clGetDeviceInfo(rend_device, CL_DEVICE_NAME, sizeof(rend_name),
+                            rend_name, NULL);
+            clGetDeviceInfo(read_device, CL_DEVICE_NAME, sizeof(read_name),
+                            read_name, NULL);
 
-            log_info("Rendering on: %s, read back on: %s\n", rend_name, read_name);
-            error = test_fence_sync_single( device, context, queue, false, rend_vs, read_vs, rend_device );
+            log_info("Rendering on: %s, read back on: %s\n", rend_name,
+                     read_name);
+            error = test_fence_sync_single(device, context, queue, false,
+                                           rend_vs, read_vs, rend_device);
             any_failed |= error;
-            if( error != 0 )
-                log_error( "ERROR: Implicit syncing with GL sync events failed!\n\n" );
+            if (error != 0)
+                log_error(
+                    "ERROR: Implicit syncing with GL sync events failed!\n\n");
             else
                 log_info("Implicit syncing Passed\n");
 
-            error = test_fence_sync_single( device, context, queue, true, rend_vs, read_vs, rend_device );
+            error = test_fence_sync_single(device, context, queue, true,
+                                           rend_vs, read_vs, rend_device);
             any_failed |= error;
-            if( error != 0 )
-                log_error( "ERROR: Explicit syncing with GL sync events failed!\n\n" );
+            if (error != 0)
+                log_error(
+                    "ERROR: Explicit syncing with GL sync events failed!\n\n");
             else
                 log_info("Explicit syncing Passed\n");
         }
-- 
cgit v1.2.3


From 79f692d8e59f37236c179ebbca086231d5f5c9bc Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Wed, 21 Jul 2021 01:51:29 -0600
Subject: subgroups: Fix setting cl_halfs and progress check. (#1278)

* subgroups: Fix setting cl_halfs and progress check.

cl_float testing uses set_value such that a generated cl_ulong of 1 is
stored as 1.0F in a logical sense. However, cl_half values aren't
intrinsic to C++ and generated cl_ulongs less than 1024 in particular
are interpreted bitwise as subnormals. The test fails on compute devices
lacking subnormal support. Perform the logical conversion to cl_half.

Fix independent forward progress check.

* subgroups_half: Address review comments

* subgroups_half: Formatting fixes required by check-format

* subgroups_half: Modified to query and use rounding mode supported by device

Co-authored-by: spauls <spauls@qti.qualcomm.com>
---
 test_conformance/subgroups/main.cpp                | 18 +++++++++++++++
 .../subgroups/subgroup_common_templates.h          |  2 +-
 test_conformance/subgroups/subhelpers.h            |  3 ++-
 test_conformance/subgroups/test_ifp.cpp            | 26 +++++++++++++---------
 4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index 44416dd7..ebe94558 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -19,8 +19,10 @@
 #include <string.h>
 #include "procs.h"
 #include "harness/testHarness.h"
+#include "CL/cl_half.h"
 
 MTdata gMTdata;
+cl_half_rounding_mode g_rounding_mode;
 
 test_definition test_list[] = {
     ADD_TEST_VERSION(sub_group_info_ext, Version(2, 0)),
@@ -66,6 +68,22 @@ static test_status InitCL(cl_device_id device)
             ret = TEST_SKIP;
         }
     }
+    // Determine the rounding mode to be used in float to half conversions in
+    // init and reference code
+    const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
+
+    if (fpConfig == CL_FP_ROUND_TO_NEAREST)
+    {
+        g_rounding_mode = CL_HALF_RTE;
+    }
+    else if (fpConfig == CL_FP_ROUND_TO_ZERO && gIsEmbedded)
+    {
+        g_rounding_mode = CL_HALF_RTZ;
+    }
+    else
+    {
+        assert(false && "Unreachable");
+    }
     return ret;
 }
 
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index b30c416b..4333e95b 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -301,7 +301,7 @@ static float to_float(subgroups::cl_half x) { return cl_half_to_float(x.data); }
 static subgroups::cl_half to_half(float x)
 {
     subgroups::cl_half value;
-    value.data = cl_half_from_float(x, CL_HALF_RTE);
+    value.data = cl_half_from_float(x, g_rounding_mode);
     return value;
 }
 
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 93673b35..9232cded 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -28,6 +28,7 @@
 #define NR_OF_ACTIVE_WORK_ITEMS 4
 
 extern MTdata gMTdata;
+extern cl_half_rounding_mode g_rounding_mode;
 
 struct WorkGroupParams
 {
@@ -1080,7 +1081,7 @@ template <typename Ty>
 typename std::enable_if<TypeManager<Ty>::is_sb_scalar_type::value>::type
 set_value(Ty &lhs, const cl_ulong &rhs)
 {
-    lhs.data = rhs;
+    lhs.data = cl_half_from_float(static_cast<cl_float>(rhs), g_rounding_mode);
 }
 
 // compare for common vectors
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index 428f2cdc..fccaa8c7 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -360,17 +360,21 @@ int test_ifp_ext(cl_device_id device, cl_context context,
     }
     // ifp only in subgroup functions tests:
     test_status error;
-    error = checkIFPSupport(device, ifpSupport);
-    if (error != TEST_PASS)
+    auto device_cl_version = get_device_cl_version(device);
+    if (device_cl_version >= Version(2, 1))
     {
-        return error;
-    }
-    if (ifpSupport == false)
-    {
-        log_info(
-            "Error reason: the extension cl_khr_subgroups requires that "
-            "Independed forward progress has to be supported by device.\n");
-        return TEST_FAIL;
+        error = checkIFPSupport(device, ifpSupport);
+        if (error != TEST_PASS)
+        {
+            return error;
+        }
+        if (ifpSupport == false)
+        {
+            log_info(
+                "Error reason: the extension cl_khr_subgroups requires that "
+                "Independed forward progress has to be supported by device.\n");
+            return TEST_FAIL;
+        }
     }
     return test_ifp(device, context, queue, num_elements, false);
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From cc0b46e4570d936c38795a20c11315f13fa25c85 Mon Sep 17 00:00:00 2001
From: kalchr01 <83217667+kalchr01@users.noreply.github.com>
Date: Mon, 9 Aug 2021 11:20:40 +0100
Subject: Add tests for entrypoint cl_khr_suggested_local_work_size (#1264)

* Add tests for entrypoint cl_khr_suggested_local_work_size

Tests added within test_conformance/workgroups. The tests cover several
shapes (num dimensions) and sizes of global work size, kernels using
local memory (dynamic and static) and present/non-present global work
offset.

Signed-off-by: Kallia Chronaki <kallia.chronaki@arm.com>

* Fix in comparison for error checking

Signed-off-by: Kallia Chronaki <kallia.chronaki@arm.com>

* 'test_wg_suggested_local_work_size' fixes

* Refactoring of 'test_wg_suggested_local_work_size'

Modifications to reduce code duplication and minimize build time
---
 test_conformance/workgroups/CMakeLists.txt         |   1 +
 test_conformance/workgroups/main.cpp               |  33 +-
 test_conformance/workgroups/procs.h                |  18 +-
 .../test_wg_suggested_local_work_size.cpp          | 611 +++++++++++++++++++++
 4 files changed, 646 insertions(+), 17 deletions(-)
 create mode 100644 test_conformance/workgroups/test_wg_suggested_local_work_size.cpp

diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt
index 08886086..c90bef88 100644
--- a/test_conformance/workgroups/CMakeLists.txt
+++ b/test_conformance/workgroups/CMakeLists.txt
@@ -14,6 +14,7 @@ set(${MODULE_NAME}_SOURCES
     test_wg_scan_inclusive_add.cpp
     test_wg_scan_inclusive_min.cpp
     test_wg_scan_inclusive_max.cpp
+    test_wg_suggested_local_work_size.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/workgroups/main.cpp b/test_conformance/workgroups/main.cpp
index 41ffa741..abb1145b 100644
--- a/test_conformance/workgroups/main.cpp
+++ b/test_conformance/workgroups/main.cpp
@@ -24,27 +24,30 @@
 #endif
 
 test_definition test_list[] = {
-    ADD_TEST(work_group_all),
-    ADD_TEST(work_group_any),
-    ADD_TEST(work_group_reduce_add),
-    ADD_TEST(work_group_reduce_min),
-    ADD_TEST(work_group_reduce_max),
-    ADD_TEST(work_group_scan_inclusive_add),
-    ADD_TEST(work_group_scan_inclusive_min),
-    ADD_TEST(work_group_scan_inclusive_max),
-    ADD_TEST(work_group_scan_exclusive_add),
-    ADD_TEST(work_group_scan_exclusive_min),
-    ADD_TEST(work_group_scan_exclusive_max),
-    ADD_TEST(work_group_broadcast_1D),
-    ADD_TEST(work_group_broadcast_2D),
-    ADD_TEST(work_group_broadcast_3D),
+    ADD_TEST_VERSION(work_group_all, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_any, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_reduce_add, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_reduce_min, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_reduce_max, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_inclusive_add, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_inclusive_min, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_inclusive_max, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_exclusive_add, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_exclusive_min, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_scan_exclusive_max, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_broadcast_1D, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_broadcast_2D, Version(2, 0)),
+    ADD_TEST_VERSION(work_group_broadcast_3D, Version(2, 0)),
+    ADD_TEST(work_group_suggested_local_size_1D),
+    ADD_TEST(work_group_suggested_local_size_2D),
+    ADD_TEST(work_group_suggested_local_size_3D)
 };
 
 const int test_num = ARRAY_SIZE(test_list);
 
 test_status InitCL(cl_device_id device) {
     auto version = get_device_cl_version(device);
-    auto expected_min_version = Version(2, 0);
+    auto expected_min_version = Version(1, 2);
     if (version < expected_min_version)
     {
         version_expected_info("Test", "OpenCL",
diff --git a/test_conformance/workgroups/procs.h b/test_conformance/workgroups/procs.h
index 2e6e79e2..6143d525 100644
--- a/test_conformance/workgroups/procs.h
+++ b/test_conformance/workgroups/procs.h
@@ -1,6 +1,6 @@
 //
-// Copyright (c) 2017 The Khronos Group Inc.
-// 
+// Copyright (c) 2017, 2021 The Khronos Group Inc.
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,6 +16,7 @@
 #include "harness/testHarness.h"
 #include "harness/kernelHelpers.h"
 #include "harness/errorHelpers.h"
+#include "harness/typeWrappers.h"
 #include "harness/conversions.h"
 #include "harness/mt19937.h"
 
@@ -36,3 +37,16 @@ extern int test_work_group_scan_exclusive_max(cl_device_id deviceID, cl_context
 extern int test_work_group_scan_inclusive_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_work_group_scan_inclusive_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 extern int test_work_group_scan_inclusive_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+
+extern int test_work_group_suggested_local_size_1D(cl_device_id device,
+                                                   cl_context context,
+                                                   cl_command_queue queue,
+                                                   int n_elems);
+extern int test_work_group_suggested_local_size_2D(cl_device_id device,
+                                                   cl_context context,
+                                                   cl_command_queue queue,
+                                                   int n_elems);
+extern int test_work_group_suggested_local_size_3D(cl_device_id device,
+                                                   cl_context context,
+                                                   cl_command_queue queue,
+                                                   int n_elems);
diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
new file mode 100644
index 00000000..1dc1b39c
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
@@ -0,0 +1,611 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <stdio.h>
+#include <iostream>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "procs.h"
+#include <CL/cl_ext.h>
+
+/** @brief Gets the number of elements of type s in a fixed length array of s */
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+#define test_error_ret_and_free(errCode, msg, retValue, ptr)                   \
+    {                                                                          \
+        auto errCodeResult = errCode;                                          \
+        if (errCodeResult != CL_SUCCESS)                                       \
+        {                                                                      \
+            print_error(errCodeResult, msg);                                   \
+            free(ptr);                                                         \
+            return retValue;                                                   \
+        }                                                                      \
+    }
+
+const char* wg_scan_local_work_group_size = R"(
+    bool is_zero_linear_id()
+    {
+        size_t linear_id;
+#if __OPENCL_VERSION__ < CL_VERSION_2_0
+        linear_id = ((get_global_id(2) – get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + 
+                    ((get_global_id(1) – get_global_offset(1)) * get_global_size(0)) + 
+                    (get_global_id(0) – get_global_offset(0));
+#else
+        linear_id = get_global_linear_id();
+#endif
+        return linear_id == 0;
+    }
+
+    uint get_l_size(size_t dim)
+    {
+#if __OPENCL_VERSION__ < CL_VERSION_2_0
+        return get_local_size(dim);
+#else
+        return get_enqueued_local_size(dim);
+#endif
+    }
+
+    __kernel void test_wg_scan_local_work_group_size(global uint *output)
+    {
+        if(!is_zero_linear_id()) return;
+        for (uint i = 0; i < 3; i++)
+        {
+            output[i] = get_l_size(i);
+        }
+    }
+    __kernel void test_wg_scan_local_work_group_size_static_local(
+                                            global uint *output)
+    {
+        __local char c[LOCAL_MEM_SIZE];
+    
+        if(!is_zero_linear_id()) return;
+        for (uint i = 0; i < 3; i++)
+        {
+            output[i] = get_l_size(i);
+        }
+    }
+    __kernel void test_wg_scan_local_work_group_size_dynlocal(
+                                        global uint *output,
+                                        __local char * c)
+    {
+        if(!is_zero_linear_id()) return;
+        for (uint i = 0; i < 3; i++)
+        {
+            output[i] = get_l_size(i);
+        }
+    };)";
+
+bool is_prime(size_t a)
+{
+    size_t c;
+
+    for (c = 2; c < a; c++)
+    {
+        if (a % c == 0) return false;
+    }
+    return true;
+}
+
+bool is_not_prime(size_t a) { return !is_prime(a); }
+
+bool is_not_even(size_t a) { return (is_prime(a) || (a % 2 == 1)); }
+
+bool is_not_odd(size_t a) { return (is_prime(a) || (a % 2 == 0)); }
+
+#define NELEMS(s) (sizeof(s) / sizeof((s)[0]))
+/* The numbers we chose in the value_range are to be used for the second and
+   third dimension of the global work group size. The numbers below cover many
+   different cases: 1024 is a power of 2, 3 is an odd and small prime number, 12
+   is a multiple of 4 but not a power of 2, 1031 is a large odd and prime number
+   and 1 is to test the lack of this dimension if the others are present */
+const size_t value_range[] = { 1024, 3, 12, 1031, 1 };
+/* The value_range_nD contains numbers to be used for the experiments with 2D
+   and 3D global work sizes. This is because we need smaller numbers so that the
+   resulting number of work items is meaningful and does not become too large.
+   The cases here are: 64 that is a power of 2, 3 is an odd and small prime
+   number, 12 is a multiple of 4 but not a power of 2, 113 is a large prime
+   number
+   and 1 is to test the lack of this dimension if the others are present */
+const size_t value_range_nD[] = { 64, 3, 12, 113, 1 };
+const size_t basic_increment = 16;
+const size_t primes_increment = 1;
+enum num_dims
+{
+    _1D = 1,
+    _2D = 2,
+    _3D = 3
+};
+
+int do_test(cl_device_id device, cl_context context, cl_command_queue queue,
+            cl_kernel scan_kernel, int work_dim, size_t global_work_offset[3],
+            size_t test_values[3], size_t dyn_mem_size)
+{
+    size_t local_work_size[] = { 1, 1, 1 };
+    size_t suggested_total_size;
+    size_t workgroupinfo_size;
+    cl_uint kernel_work_size[3] = { 0 };
+    clMemWrapper buffer;
+    cl_platform_id platform;
+
+    int err = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
+                              &platform, NULL);
+    test_error_ret(err, "clGetDeviceInfo failed", -1);
+    clGetKernelSuggestedLocalWorkSizeKHR_fn
+        clGetKernelSuggestedLocalWorkSizeKHR =
+            (clGetKernelSuggestedLocalWorkSizeKHR_fn)
+                clGetExtensionFunctionAddressForPlatform(
+                    platform, "clGetKernelSuggestedLocalWorkSizeKHR");
+
+    if (clGetKernelSuggestedLocalWorkSizeKHR == NULL)
+    {
+        log_info("Extension 'cl_khr_suggested_local_work_size' could not be "
+                 "found.\n");
+        return TEST_FAIL;
+    }
+
+    /* Create the actual buffer, using local_buffer as the host pointer, and ask
+     * to copy that into the buffer */
+    buffer = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                            sizeof(kernel_work_size), NULL, &err);
+    test_error_ret(err, "clCreateBuffer failed", -1);
+    err = clSetKernelArg(scan_kernel, 0, sizeof(buffer), &buffer);
+    test_error_ret(err, "clSetKernelArg failed", -1);
+    if (dyn_mem_size)
+    {
+        err = clSetKernelArg(scan_kernel, 1, dyn_mem_size, NULL);
+        test_error_ret(err, "clSetKernelArg failed", -1);
+    }
+    err = clGetKernelSuggestedLocalWorkSizeKHR(queue, scan_kernel, work_dim,
+                                               global_work_offset, test_values,
+                                               local_work_size);
+    test_error_ret(err, "clGetKernelSuggestedLocalWorkSizeKHR failed", -1);
+    suggested_total_size =
+        local_work_size[0] * local_work_size[1] * local_work_size[2];
+    err = clGetKernelWorkGroupInfo(
+        scan_kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
+        sizeof(workgroupinfo_size), &workgroupinfo_size, NULL);
+    test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
+    if (suggested_total_size > workgroupinfo_size)
+    {
+        std::cout << "The suggested work group size consist of "
+                  << suggested_total_size << " work items.\n"
+                  << "Work items are limited by " << workgroupinfo_size
+                  << std::endl;
+        std::cout << "Size from clGetKernelWorkGroupInfo: "
+                  << workgroupinfo_size;
+        std::cout << "\nSize from clGetKernelSuggestedLocalWorkSizeKHR: "
+                  << local_work_size[0] * local_work_size[1]
+                * local_work_size[2]
+                  << std::endl;
+        return -1;
+    }
+
+    err =
+        clEnqueueNDRangeKernel(queue, scan_kernel, work_dim, global_work_offset,
+                               test_values, // global work size
+                               NULL, 0, NULL, NULL);
+    test_error_ret(err, "clEnqueueNDRangeKernel failed", -1);
+    err = clEnqueueReadBuffer(queue, buffer, CL_NON_BLOCKING, 0,
+                              sizeof(kernel_work_size), kernel_work_size, 0,
+                              NULL, NULL);
+    test_error_ret(err, "clEnqueueReadBuffer failed", -1);
+    err = clFinish(queue);
+    test_error_ret(err, "clFinish failed", -1);
+
+    if (kernel_work_size[0] != local_work_size[0]
+        || kernel_work_size[1] != local_work_size[1]
+        || kernel_work_size[2] != local_work_size[2])
+    {
+        std::cout
+            << "Kernel work size differs from local work size suggested:\n"
+            << "Kernel work size: (" << kernel_work_size[0] << ", "
+            << kernel_work_size[1] << ", " << kernel_work_size[2] << ")"
+            << "Local work size: (" << local_work_size[0] << ", "
+            << local_work_size[1] << ", " << local_work_size[2] << ")\n";
+        return -1;
+    }
+    return err;
+}
+
+int do_test_work_group_suggested_local_size(
+    cl_device_id device, cl_context context, cl_command_queue queue,
+    bool (*skip_cond)(size_t), size_t start, size_t end, size_t incr,
+    cl_long max_local_mem_size, size_t global_work_offset[], num_dims dim)
+{
+    clProgramWrapper scan_program;
+    clKernelWrapper scan_kernel;
+    int err;
+    size_t test_values[] = { 1, 1, 1 };
+    std::string kernel_names[6] = {
+        "test_wg_scan_local_work_group_size",
+        "test_wg_scan_local_work_group_size_static_local",
+        "test_wg_scan_local_work_group_size_static_local",
+        "test_wg_scan_local_work_group_size_static_local",
+        "test_wg_scan_local_work_group_size_static_local",
+        "test_wg_scan_local_work_group_size_dynlocal"
+    };
+    std::string str_local_mem_size[6] = {
+        "-DLOCAL_MEM_SIZE=1",     "-DLOCAL_MEM_SIZE=1024",
+        "-DLOCAL_MEM_SIZE=4096",  "-DLOCAL_MEM_SIZE=16384",
+        "-DLOCAL_MEM_SIZE=32768", "-DLOCAL_MEM_SIZE=1"
+    };
+    size_t local_mem_size[6] = { 1, 1024, 4096, 16384, 32768, 1 };
+    size_t dyn_mem_size[6] = { 0, 0, 0, 0, 0, 1024 };
+    cl_ulong kernel_local_mem_size;
+    for (int kernel_num = 0; kernel_num < 6; kernel_num++)
+    {
+        if (max_local_mem_size < local_mem_size[kernel_num]) continue;
+        // Create the kernel
+        err = create_single_kernel_helper(
+            context, &scan_program, &scan_kernel, 1,
+            &wg_scan_local_work_group_size, (kernel_names[kernel_num]).c_str(),
+            (str_local_mem_size[kernel_num]).c_str());
+        test_error_ret(err,
+                       ("create_single_kernel_helper failed for kernel "
+                        + kernel_names[kernel_num])
+                           .c_str(),
+                       -1);
+
+        // Check if the local memory used by the kernel is going to exceed the
+        // max_local_mem_size
+        err = clGetKernelWorkGroupInfo(
+            scan_kernel, device, CL_KERNEL_LOCAL_MEM_SIZE,
+            sizeof(kernel_local_mem_size), &kernel_local_mem_size, NULL);
+        test_error_ret(err, "clGetKernelWorkGroupInfo failed", -1);
+        if (kernel_local_mem_size > max_local_mem_size) continue;
+        // return error if no number is found due to the skip condition
+        err = -1;
+        unsigned int j = 0;
+        size_t num_elems = NELEMS(value_range);
+        for (size_t i = start; i < end; i += incr)
+        {
+            if (skip_cond(i)) continue;
+            err = 0;
+            test_values[0] = i;
+            if (dim == _2D) test_values[1] = value_range_nD[j++ % num_elems];
+            if (dim == _3D)
+            {
+                test_values[1] = value_range_nD[j++ % num_elems];
+                test_values[2] = value_range_nD[rand() % num_elems];
+            }
+            err |= do_test(device, context, queue, scan_kernel, dim,
+                           global_work_offset, test_values,
+                           dyn_mem_size[kernel_num]);
+            test_error_ret(
+                err,
+                ("do_test failed for kernel " + kernel_names[kernel_num])
+                    .c_str(),
+                -1);
+        }
+    }
+    return err;
+}
+
+int test_work_group_suggested_local_size_1D(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue, int n_elems)
+{
+    if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+    {
+        log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+                 "Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    cl_long max_local_mem_size;
+    cl_int err =
+        clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+                   -1);
+
+    size_t start, end, incr;
+    size_t global_work_offset[] = { 0, 0, 0 };
+    size_t max_work_items = 0;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                    sizeof(max_work_items), &max_work_items, NULL);
+
+    // odds
+    start = 1;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_1D for odds failed.", -1);
+    log_info("test_work_group_suggested_local_size_1D odds passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_1D for evens failed.", -1);
+    log_info("test_work_group_suggested_local_size_1D evens passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = 2 * max_work_items;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_1D for primes failed.", -1);
+    log_info("test_work_group_suggested_local_size_1D primes passed\n");
+
+    global_work_offset[0] = 10;
+    global_work_offset[1] = 10;
+    global_work_offset[2] = 10;
+    // odds
+    start = 1;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_1D for odds with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_1D odds with "
+             "global_work_offset passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_1D for evens with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_1D evens with "
+             "global_work_offset passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = 2 * max_work_items;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _1D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_1D for primes with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_1D primes with "
+             "global_work_offset passed\n");
+
+    return err;
+}
+
+int test_work_group_suggested_local_size_2D(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue, int n_elems)
+{
+    if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+    {
+        log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+                 "Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    cl_long max_local_mem_size;
+    cl_int err =
+        clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+                   -1);
+
+    size_t start, end, incr;
+    size_t global_work_offset[] = { 0, 0, 0 };
+    size_t max_work_items = 0;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                    sizeof(max_work_items), &max_work_items, NULL);
+
+    // odds
+    start = 1;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_2D for odds failed.", -1);
+    log_info("test_work_group_suggested_local_size_2D odds passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_2D for evens failed.", -1);
+    log_info("test_work_group_suggested_local_size_2D evens passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = max_work_items + max_work_items / 4;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_2D for primes failed.", -1);
+    log_info("test_work_group_suggested_local_size_2D primes passed\n");
+
+    global_work_offset[0] = 10;
+    global_work_offset[1] = 10;
+    global_work_offset[2] = 10;
+
+    // odds
+    start = 1;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_2D for odds with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_2D odds with "
+             "global_work_offset passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_2D for evens with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_2D evens with "
+             "global_work_offset passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = max_work_items + max_work_items / 4;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _2D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_2D for primes with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_2D primes with "
+             "global_work_offset passed\n");
+
+    return err;
+}
+
+int test_work_group_suggested_local_size_3D(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue, int n_elems)
+{
+    if (!is_extension_available(device, "cl_khr_suggested_local_work_size"))
+    {
+        log_info("Device does not support 'cl_khr_suggested_local_work_size'. "
+                 "Skipping the test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+    cl_long max_local_mem_size;
+    cl_int err =
+        clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE,
+                        sizeof(max_local_mem_size), &max_local_mem_size, NULL);
+    test_error_ret(err, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.",
+                   -1);
+
+    size_t start, end, incr;
+    size_t global_work_offset[] = { 0, 0, 0 };
+    size_t max_work_items = 0;
+    clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                    sizeof(max_work_items), &max_work_items, NULL);
+
+    // odds
+    start = 1;
+    end = max_work_items / 2;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_3D for odds failed.", -1);
+    log_info("test_work_group_suggested_local_size_3D odds passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items / 2;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_3D for evens failed.", -1);
+    log_info("test_work_group_suggested_local_size_3D evens passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = max_work_items + max_work_items / 4;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(
+        err, "test_work_group_suggested_local_size_3D for primes failed.", -1);
+    log_info("test_work_group_suggested_local_size_3D primes passed\n");
+
+    global_work_offset[0] = 10;
+    global_work_offset[1] = 10;
+    global_work_offset[2] = 10;
+
+    // odds
+    start = 1;
+    end = max_work_items / 2;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_odd, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_3D for odds with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_3D odds with "
+             "global_work_offset passed\n");
+
+    // evens
+    start = 2;
+    end = max_work_items / 2;
+    incr = basic_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_even, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_3D for evens with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_3D evens with "
+             "global_work_offset passed\n");
+
+    // primes
+    start = max_work_items + 1;
+    end = max_work_items + max_work_items / 4;
+    incr = primes_increment;
+    err = do_test_work_group_suggested_local_size(
+        device, context, queue, is_not_prime, start, end, incr,
+        max_local_mem_size, global_work_offset, _3D);
+    test_error_ret(err,
+                   "test_work_group_suggested_local_size_3D for primes with "
+                   "global_work_offset failed.",
+                   -1);
+    log_info("test_work_group_suggested_local_size_3D primes with "
+             "global_work_offset passed\n");
+
+    return err;
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 4759e5cae0e3b3b6dd841fe28ad01f4b4f2478e6 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 11 Aug 2021 10:03:44 -0700
Subject: remove testing for scalar vloada_half (#1293)

---
 test_conformance/half/Test_vLoadHalf.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp
index 52867c25..5dfac7a3 100644
--- a/test_conformance/half/Test_vLoadHalf.cpp
+++ b/test_conformance/half/Test_vLoadHalf.cpp
@@ -37,14 +37,12 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
     const char *vector_size_names[]   = {"1", "2", "4", "8", "16", "3"};
 
     int minVectorSize = kMinVectorSize;
-    // There is no aligned scalar vloada_half in CL 1.1
-#if ! defined( CL_VERSION_1_1 ) && ! defined(__APPLE__)
-    vlog("Note: testing vloada_half.\n");
-    if (aligned && minVectorSize == 0)
-        minVectorSize = 1;
-#endif
 
-    for( vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest; vectorSize++)
+    // There is no aligned scalar vloada_half
+    if (aligned && minVectorSize == 0) minVectorSize = 1;
+
+    for (vectorSize = minVectorSize; vectorSize < kLastVectorSizeToTest;
+         vectorSize++)
     {
 
         int effectiveVectorSize = g_arrVecSizes[vectorSize];
@@ -81,7 +79,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
             "{\n"
             "   size_t i = get_global_id(0);\n"
             "   f[i] = vloada_half3( i, p );\n"
-            "   ((__global float *)f)[4*i+3] = vloada_half(4*i+3,p);\n"
+            "   ((__global float *)f)[4*i+3] = vload_half(4*i+3,p);\n"
             "}\n"
         };
 
-- 
cgit v1.2.3


From 1aa930957a3f7ca6df30b64f61d082f2359fe486 Mon Sep 17 00:00:00 2001
From: Feng Zou <feng.zou@intel.com>
Date: Thu, 12 Aug 2021 01:04:21 +0800
Subject: Temporarily disable the test_kernel_attributes test case (#1297)

* Temporarily disable the test_kernel_attributes test case

Per OpenCL spec on CL_KERNEL_ATTRIBUTES, for kernels not created from OpenCL C
source and the clCreateProgramWithSource API call the string returned from this
query will be empty.
But in test_kernel_attributes test, it read from bc binary and expect to get
kernel attribute, which is not consistent with OpenCL spec.

* Fix clang format issue
---
 test_conformance/spir/main.cpp | 73 ++++++++++++++++++++++--------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/test_conformance/spir/main.cpp b/test_conformance/spir/main.cpp
index 3a18988c..06caf33b 100644
--- a/test_conformance/spir/main.cpp
+++ b/test_conformance/spir/main.cpp
@@ -6615,40 +6615,45 @@ struct sub_suite
 };
 
 static const sub_suite spir_suites[] = {
-    {"api",                         "api",                       test_api},
-    {"api_double",                  "api",                       test_api_double},
-    {"atomics",                     "atomics",                   test_atomics},
-    {"basic",                       "basic",                     test_basic},
-    {"basic_double",                "basic",                     test_basic_double},
-    {"commonfns",                   "commonfns",                 test_commonfns},
-    {"commonfns_double",            "commonfns",                 test_commonfns_double},
-    {"conversions",                 "conversions",               test_conversions},
-    {"conversions_double",          "conversions",               test_conversions_double},
-    {"geometrics",                  "geometrics",                test_geometrics},
-    {"geometrics_double",           "geometrics",                test_geometrics_double},
-    {"half",                        "half",                      test_half},
-    {"half_double",                 "half",                      test_half_double},
-    {"kernel_image_methods",        "kernel_image_methods",      test_kernel_image_methods},
-    {"images_kernel_read_write",    "images_kernel_read_write",  test_images_kernel_read_write},
-    {"images_samplerlessRead",      "images_samplerlessRead",    test_images_samplerless_read},
-    {"integer_ops",                 "integer_ops",               test_integer_ops},
-    {"math_brute_force",            "math_brute_force",          test_math_brute_force},
-    {"math_brute_force_double",     "math_brute_force",          test_math_brute_force_double},
-    {"printf",                      "printf",                    test_printf},
-    {"profiling",                   "profiling",                 test_profiling},
-    {"relationals",                 "relationals",               test_relationals},
-    {"relationals_double",          "relationals",               test_relationals_double},
-    {"select",                      "select",                    test_select},
-    {"select_double",               "select",                    test_select_double},
-    {"vec_align",                   "vec_align",                 test_vec_align},
-    {"vec_align_double",            "vec_align",                 test_vec_align_double},
-    {"vec_step",                    "vec_step",                  test_vec_step},
-    {"vec_step_double",             "vec_step",                  test_vec_step_double},
-    {"compile_and_link",            "compile_and_link",          test_compile_and_link},
-    {"sampler_enumeration",         "sampler_enumeration",       test_sampler_enumeration},
-    {"enum_values",                 "enum_values",               test_enum_values},
-    {"kernel_attributes",           "kernel_attributes",         test_kernel_attributes},
-    {"binary_type",                  "binary_type",              test_binary_type},
+    { "api", "api", test_api },
+    { "api_double", "api", test_api_double },
+    { "atomics", "atomics", test_atomics },
+    { "basic", "basic", test_basic },
+    { "basic_double", "basic", test_basic_double },
+    { "commonfns", "commonfns", test_commonfns },
+    { "commonfns_double", "commonfns", test_commonfns_double },
+    { "conversions", "conversions", test_conversions },
+    { "conversions_double", "conversions", test_conversions_double },
+    { "geometrics", "geometrics", test_geometrics },
+    { "geometrics_double", "geometrics", test_geometrics_double },
+    { "half", "half", test_half },
+    { "half_double", "half", test_half_double },
+    { "kernel_image_methods", "kernel_image_methods",
+      test_kernel_image_methods },
+    { "images_kernel_read_write", "images_kernel_read_write",
+      test_images_kernel_read_write },
+    { "images_samplerlessRead", "images_samplerlessRead",
+      test_images_samplerless_read },
+    { "integer_ops", "integer_ops", test_integer_ops },
+    { "math_brute_force", "math_brute_force", test_math_brute_force },
+    { "math_brute_force_double", "math_brute_force",
+      test_math_brute_force_double },
+    { "printf", "printf", test_printf },
+    { "profiling", "profiling", test_profiling },
+    { "relationals", "relationals", test_relationals },
+    { "relationals_double", "relationals", test_relationals_double },
+    { "select", "select", test_select },
+    { "select_double", "select", test_select_double },
+    { "vec_align", "vec_align", test_vec_align },
+    { "vec_align_double", "vec_align", test_vec_align_double },
+    { "vec_step", "vec_step", test_vec_step },
+    { "vec_step_double", "vec_step", test_vec_step_double },
+    { "compile_and_link", "compile_and_link", test_compile_and_link },
+    { "sampler_enumeration", "sampler_enumeration", test_sampler_enumeration },
+    { "enum_values", "enum_values", test_enum_values },
+    // {"kernel_attributes",           "kernel_attributes",
+    // test_kernel_attributes}, // disabling temporarily, see GitHub #1284
+    { "binary_type", "binary_type", test_binary_type },
 };
 
 
-- 
cgit v1.2.3


From 6da9c6b68f9643a077f7281451b59f444a77a991 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Wed, 11 Aug 2021 19:06:10 +0200
Subject: Fix double free in c11_atomics tests for SVM allocations (#1286)

* Only Clang format changes

* Fix double free object for SVM allocations

* Fix double free - review fixes
---
 test_conformance/c11_atomics/common.h | 2556 ++++++++++++++++++---------------
 1 file changed, 1381 insertions(+), 1175 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index bbcc68c6..d30259f0 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -28,10 +28,9 @@
 #define MAX_DEVICE_THREADS (gHost ? 0U : gMaxDeviceThreads)
 #define MAX_HOST_THREADS GetThreadCount()
 
-#define EXECUTE_TEST(error, test)\
-  error |= test;\
-  if(error && !gContinueOnError)\
-  return error;
+#define EXECUTE_TEST(error, test)                                              \
+    error |= test;                                                             \
+    if (error && !gContinueOnError) return error;
 
 enum TExplicitAtomicType
 {
@@ -57,764 +56,918 @@ enum TExplicitMemoryScopeType
     MEMORY_SCOPE_ALL_SVM_DEVICES
 };
 
-extern bool gHost; // temporary flag for testing native host threads (test verification)
+extern bool
+    gHost; // temporary flag for testing native host threads (test verification)
 extern bool gOldAPI; // temporary flag for testing with old API (OpenCL 1.2)
 extern bool gContinueOnError; // execute all cases even when errors detected
-extern bool gNoGlobalVariables; // disable cases with global atomics in program scope
+extern bool
+    gNoGlobalVariables; // disable cases with global atomics in program scope
 extern bool gNoGenericAddressSpace; // disable cases with generic address space
 extern bool gUseHostPtr; // use malloc/free instead of clSVMAlloc/clSVMFree
 extern bool gDebug; // print OpenCL kernel code
-extern int gInternalIterations; // internal test iterations for atomic operation, sufficient to verify atomicity
-extern int gMaxDeviceThreads; // maximum number of threads executed on OCL device
+extern int gInternalIterations; // internal test iterations for atomic
+                                // operation, sufficient to verify atomicity
+extern int
+    gMaxDeviceThreads; // maximum number of threads executed on OCL device
 extern cl_device_atomic_capabilities gAtomicMemCap,
     gAtomicFenceCap; // atomic memory and fence capabilities for this device
 
-extern const char *get_memory_order_type_name(TExplicitMemoryOrderType orderType);
-extern const char *get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
+extern const char *
+get_memory_order_type_name(TExplicitMemoryOrderType orderType);
+extern const char *
+get_memory_scope_type_name(TExplicitMemoryScopeType scopeType);
 
 extern cl_int getSupportedMemoryOrdersAndScopes(
     cl_device_id device, std::vector<TExplicitMemoryOrderType> &memoryOrders,
     std::vector<TExplicitMemoryScopeType> &memoryScopes);
 
-class AtomicTypeInfo
-{
+class AtomicTypeInfo {
 public:
-  TExplicitAtomicType _type;
-  AtomicTypeInfo(TExplicitAtomicType type): _type(type) {}
-  cl_uint Size(cl_device_id device);
-  const char* AtomicTypeName();
-  const char* RegularTypeName();
-  const char* AddSubOperandTypeName();
-  int IsSupported(cl_device_id device);
+    TExplicitAtomicType _type;
+    AtomicTypeInfo(TExplicitAtomicType type): _type(type) {}
+    cl_uint Size(cl_device_id device);
+    const char *AtomicTypeName();
+    const char *RegularTypeName();
+    const char *AddSubOperandTypeName();
+    int IsSupported(cl_device_id device);
 };
 
-template<typename HostDataType>
-class AtomicTypeExtendedInfo : public AtomicTypeInfo
-{
+template <typename HostDataType>
+class AtomicTypeExtendedInfo : public AtomicTypeInfo {
 public:
-  AtomicTypeExtendedInfo(TExplicitAtomicType type) : AtomicTypeInfo(type) {}
-  HostDataType MinValue();
-  HostDataType MaxValue();
-  HostDataType SpecialValue(cl_uchar x)
-  {
-    HostDataType tmp;
-    cl_uchar *ptr = (cl_uchar*)&tmp;
-    for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_uchar); i++)
-      ptr[i] = x;
-    return tmp;
-  }
-  HostDataType SpecialValue(cl_ushort x)
-  {
-    HostDataType tmp;
-    cl_ushort *ptr = (cl_ushort*)&tmp;
-    for(cl_uint i = 0; i < sizeof(HostDataType)/sizeof(cl_ushort); i++)
-      ptr[i] = x;
-    return tmp;
-  }
+    AtomicTypeExtendedInfo(TExplicitAtomicType type): AtomicTypeInfo(type) {}
+    HostDataType MinValue();
+    HostDataType MaxValue();
+    HostDataType SpecialValue(cl_uchar x)
+    {
+        HostDataType tmp;
+        cl_uchar *ptr = (cl_uchar *)&tmp;
+        for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_uchar); i++)
+            ptr[i] = x;
+        return tmp;
+    }
+    HostDataType SpecialValue(cl_ushort x)
+    {
+        HostDataType tmp;
+        cl_ushort *ptr = (cl_ushort *)&tmp;
+        for (cl_uint i = 0; i < sizeof(HostDataType) / sizeof(cl_ushort); i++)
+            ptr[i] = x;
+        return tmp;
+    }
 };
 
-class CTest  {
+class CTest {
 public:
-  virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) = 0;
+    virtual int Execute(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements) = 0;
 };
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTest : CTest
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTest : CTest {
 public:
-  typedef struct {
-    CBasicTest *test;
-    cl_uint tid;
-    cl_uint threadCount;
-    volatile HostAtomicType *destMemory;
-    HostDataType *oldValues;
-  } THostThreadContext;
-  static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id, void *userInfo)
-  {
-    THostThreadContext *threadContext = ((THostThreadContext*)userInfo)+job_id;
-    threadContext->test->HostFunction(threadContext->tid, threadContext->threadCount, threadContext->destMemory, threadContext->oldValues);
-    return 0;
-  }
-  CBasicTest(TExplicitAtomicType dataType, bool useSVM) : CTest(),
-    _maxDeviceThreads(MAX_DEVICE_THREADS),
-    _dataType(dataType), _useSVM(useSVM), _startValue(255),
-    _localMemory(false), _declaredInProgram(false),
-    _usedInFunction(false), _genericAddrSpace(false),
-    _oldValueCheck(true), _localRefValues(false),
-    _maxGroupSize(0), _passCount(0), _iterations(gInternalIterations)
-  {
-  }
-  virtual ~CBasicTest()
-  {
-    if(_passCount)
-      log_info("  %u tests executed successfully for %s\n", _passCount, DataType().AtomicTypeName());
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return 1;
-  }
-  virtual cl_uint NumNonAtomicVariablesPerThread()
-  {
-    return 1;
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    return false;
-  }
-  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
-  {
-    return false;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    return false;
-  }
-  virtual std::string PragmaHeader(cl_device_id deviceID);
-  virtual std::string ProgramHeader(cl_uint maxNumDestItems);
-  virtual std::string FunctionCode();
-  virtual std::string KernelCode(cl_uint maxNumDestItems);
-  virtual std::string ProgramCore() = 0;
-  virtual std::string SingleTestName()
-  {
-    std::string testName = LocalMemory() ? "local" : "global";
-    testName += " ";
-    testName += DataType().AtomicTypeName();
-    if(DeclaredInProgram())
-    {
-      testName += " declared in program";
-    }
-    if(DeclaredInProgram() && UsedInFunction())
-      testName += ",";
-    if(UsedInFunction())
-    {
-      testName += " used in ";
-      if(GenericAddrSpace())
-        testName += "generic ";
-      testName += "function";
-    }
-    return testName;
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue);
-  int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    int error = 0;
-    UsedInFunction(false);
-    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
-    UsedInFunction(true);
-    GenericAddrSpace(false);
-    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
-    GenericAddrSpace(true);
-    EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
-    GenericAddrSpace(false);
-    return error;
-  }
-  int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    int error = 0;
-    DeclaredInProgram(false);
-    EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
-    if(!UseSVM())
-    {
-      DeclaredInProgram(true);
-      EXECUTE_TEST(error, ExecuteForEachPointerType(deviceID, context, queue));
-    }
-    return error;
-  }
-  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    int error = 0;
-    if(_maxDeviceThreads > 0 && !UseSVM())
-    {
-      LocalMemory(true);
-      EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
-    }
-    if(_maxDeviceThreads+MaxHostThreads() > 0)
-    {
-      LocalMemory(false);
-      EXECUTE_TEST(error, ExecuteForEachDeclarationType(deviceID, context, queue));
-    }
-    return error;
-  }
-  virtual int Execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-  {
-    if(sizeof(HostAtomicType) != DataType().Size(deviceID))
-    {
-      log_info("Invalid test: Host atomic type size (%u) is different than OpenCL type size (%u)\n", (cl_uint)sizeof(HostAtomicType), DataType().Size(deviceID));
-      return -1;
-    }
-    if(sizeof(HostAtomicType) != sizeof(HostDataType))
-    {
-      log_info("Invalid test: Host atomic type size (%u) is different than corresponding type size (%u)\n", (cl_uint)sizeof(HostAtomicType), (cl_uint)sizeof(HostDataType));
-      return -1;
-    }
-    // Verify we can run first
-    if(UseSVM() && !gUseHostPtr)
-    {
-      cl_device_svm_capabilities caps;
-      cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES, sizeof(caps), &caps, 0);
-      test_error(error, "clGetDeviceInfo failed");
-      if((caps & CL_DEVICE_SVM_ATOMICS) == 0)
-      {
-        log_info("\t%s - SVM_ATOMICS not supported\n", DataType().AtomicTypeName());
-        // implicit pass
+    typedef struct
+    {
+        CBasicTest *test;
+        cl_uint tid;
+        cl_uint threadCount;
+        volatile HostAtomicType *destMemory;
+        HostDataType *oldValues;
+    } THostThreadContext;
+    static cl_int HostThreadFunction(cl_uint job_id, cl_uint thread_id,
+                                     void *userInfo)
+    {
+        THostThreadContext *threadContext =
+            ((THostThreadContext *)userInfo) + job_id;
+        threadContext->test->HostFunction(
+            threadContext->tid, threadContext->threadCount,
+            threadContext->destMemory, threadContext->oldValues);
         return 0;
-      }
     }
-    if(!DataType().IsSupported(deviceID))
+    CBasicTest(TExplicitAtomicType dataType, bool useSVM)
+        : CTest(), _maxDeviceThreads(MAX_DEVICE_THREADS), _dataType(dataType),
+          _useSVM(useSVM), _startValue(255), _localMemory(false),
+          _declaredInProgram(false), _usedInFunction(false),
+          _genericAddrSpace(false), _oldValueCheck(true),
+          _localRefValues(false), _maxGroupSize(0), _passCount(0),
+          _iterations(gInternalIterations)
+    {}
+    virtual ~CBasicTest()
+    {
+        if (_passCount)
+            log_info("  %u tests executed successfully for %s\n", _passCount,
+                     DataType().AtomicTypeName());
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return 1;
+    }
+    virtual cl_uint NumNonAtomicVariablesPerThread() { return 1; }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        return false;
+    }
+    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                              MTdata d)
+    {
+        return false;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
     {
-      log_info("\t%s not supported\n", DataType().AtomicTypeName());
-      // implicit pass or host test (debug feature)
-      if(UseSVM())
+        return false;
+    }
+    virtual std::string PragmaHeader(cl_device_id deviceID);
+    virtual std::string ProgramHeader(cl_uint maxNumDestItems);
+    virtual std::string FunctionCode();
+    virtual std::string KernelCode(cl_uint maxNumDestItems);
+    virtual std::string ProgramCore() = 0;
+    virtual std::string SingleTestName()
+    {
+        std::string testName = LocalMemory() ? "local" : "global";
+        testName += " ";
+        testName += DataType().AtomicTypeName();
+        if (DeclaredInProgram())
+        {
+            testName += " declared in program";
+        }
+        if (DeclaredInProgram() && UsedInFunction()) testName += ",";
+        if (UsedInFunction())
+        {
+            testName += " used in ";
+            if (GenericAddrSpace()) testName += "generic ";
+            testName += "function";
+        }
+        return testName;
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue);
+    int ExecuteForEachPointerType(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        int error = 0;
+        UsedInFunction(false);
+        EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+        UsedInFunction(true);
+        GenericAddrSpace(false);
+        EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+        GenericAddrSpace(true);
+        EXECUTE_TEST(error, ExecuteSingleTest(deviceID, context, queue));
+        GenericAddrSpace(false);
+        return error;
+    }
+    int ExecuteForEachDeclarationType(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue)
+    {
+        int error = 0;
+        DeclaredInProgram(false);
+        EXECUTE_TEST(error,
+                     ExecuteForEachPointerType(deviceID, context, queue));
+        if (!UseSVM())
+        {
+            DeclaredInProgram(true);
+            EXECUTE_TEST(error,
+                         ExecuteForEachPointerType(deviceID, context, queue));
+        }
+        return error;
+    }
+    virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue)
+    {
+        int error = 0;
+        if (_maxDeviceThreads > 0 && !UseSVM())
+        {
+            LocalMemory(true);
+            EXECUTE_TEST(
+                error, ExecuteForEachDeclarationType(deviceID, context, queue));
+        }
+        if (_maxDeviceThreads + MaxHostThreads() > 0)
+        {
+            LocalMemory(false);
+            EXECUTE_TEST(
+                error, ExecuteForEachDeclarationType(deviceID, context, queue));
+        }
+        return error;
+    }
+    virtual int Execute(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements)
+    {
+        if (sizeof(HostAtomicType) != DataType().Size(deviceID))
+        {
+            log_info("Invalid test: Host atomic type size (%u) is different "
+                     "than OpenCL type size (%u)\n",
+                     (cl_uint)sizeof(HostAtomicType),
+                     DataType().Size(deviceID));
+            return -1;
+        }
+        if (sizeof(HostAtomicType) != sizeof(HostDataType))
+        {
+            log_info("Invalid test: Host atomic type size (%u) is different "
+                     "than corresponding type size (%u)\n",
+                     (cl_uint)sizeof(HostAtomicType),
+                     (cl_uint)sizeof(HostDataType));
+            return -1;
+        }
+        // Verify we can run first
+        if (UseSVM() && !gUseHostPtr)
+        {
+            cl_device_svm_capabilities caps;
+            cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_SVM_CAPABILITIES,
+                                           sizeof(caps), &caps, 0);
+            test_error(error, "clGetDeviceInfo failed");
+            if ((caps & CL_DEVICE_SVM_ATOMICS) == 0)
+            {
+                log_info("\t%s - SVM_ATOMICS not supported\n",
+                         DataType().AtomicTypeName());
+                // implicit pass
+                return 0;
+            }
+        }
+        if (!DataType().IsSupported(deviceID))
+        {
+            log_info("\t%s not supported\n", DataType().AtomicTypeName());
+            // implicit pass or host test (debug feature)
+            if (UseSVM()) return 0;
+            _maxDeviceThreads = 0;
+        }
+        if (_maxDeviceThreads + MaxHostThreads() == 0) return 0;
+        return ExecuteForEachParameterSet(deviceID, context, queue);
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        log_info("Empty thread function %u\n", (cl_uint)tid);
+    }
+    AtomicTypeExtendedInfo<HostDataType> DataType() const
+    {
+        return AtomicTypeExtendedInfo<HostDataType>(_dataType);
+    }
+    cl_uint _maxDeviceThreads;
+    virtual cl_uint MaxHostThreads()
+    {
+        if (UseSVM() || gHost)
+            return MAX_HOST_THREADS;
+        else
+            return 0;
+    }
+
+    int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
+                          TExplicitMemoryOrderType memoryOrder)
+    {
+        /*
+            Differentiation between atomic fence and other atomic operations
+            does not need to occur here.
+
+            The initialisation of this test checks that the minimum required
+            capabilities are supported by this device.
+
+            The following switches allow the test to skip if optional
+           capabilites are not supported by the device.
+          */
+        switch (memoryScope)
+        {
+            case MEMORY_SCOPE_EMPTY: {
+                break;
+            }
+            case MEMORY_SCOPE_WORK_GROUP: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            case MEMORY_SCOPE_DEVICE: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            case MEMORY_SCOPE_ALL_DEVICES: // fallthough
+            case MEMORY_SCOPE_ALL_SVM_DEVICES: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            default: {
+                log_info("Invalid memory scope\n");
+                break;
+            }
+        }
+
+        switch (memoryOrder)
+        {
+            case MEMORY_ORDER_EMPTY: {
+                break;
+            }
+            case MEMORY_ORDER_RELAXED: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            case MEMORY_ORDER_ACQUIRE:
+            case MEMORY_ORDER_RELEASE:
+            case MEMORY_ORDER_ACQ_REL: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            case MEMORY_ORDER_SEQ_CST: {
+                if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
+                {
+                    return TEST_SKIPPED_ITSELF;
+                }
+                break;
+            }
+            default: {
+                log_info("Invalid memory order\n");
+                break;
+            }
+        }
+
         return 0;
-      _maxDeviceThreads = 0;
-    }
-    if(_maxDeviceThreads+MaxHostThreads() == 0)
-      return 0;
-    return ExecuteForEachParameterSet(deviceID, context, queue);
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    log_info("Empty thread function %u\n", (cl_uint)tid);
-  }
-  AtomicTypeExtendedInfo<HostDataType> DataType() const
-  {
-    return AtomicTypeExtendedInfo<HostDataType>(_dataType);
-  }
-  cl_uint _maxDeviceThreads;
-  virtual cl_uint MaxHostThreads()
-  {
-    if(UseSVM() || gHost)
-      return MAX_HOST_THREADS;
-    else
-      return 0;
-  }
-
-  int CheckCapabilities(TExplicitMemoryScopeType memoryScope,
-                        TExplicitMemoryOrderType memoryOrder)
-  {
-      /*
-          Differentiation between atomic fence and other atomic operations
-          does not need to occur here.
-
-          The initialisation of this test checks that the minimum required
-          capabilities are supported by this device.
-
-          The following switches allow the test to skip if optional capabilites
-          are not supported by the device.
-        */
-      switch (memoryScope)
-      {
-          case MEMORY_SCOPE_EMPTY: {
-              break;
-          }
-          case MEMORY_SCOPE_WORK_GROUP: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_DEVICE: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_SCOPE_ALL_DEVICES: // fallthough
-          case MEMORY_SCOPE_ALL_SVM_DEVICES: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory scope\n");
-              break;
-          }
-      }
-
-      switch (memoryOrder)
-      {
-          case MEMORY_ORDER_EMPTY: {
-              break;
-          }
-          case MEMORY_ORDER_RELAXED: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_RELAXED) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_ACQUIRE:
-          case MEMORY_ORDER_RELEASE:
-          case MEMORY_ORDER_ACQ_REL: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          case MEMORY_ORDER_SEQ_CST: {
-              if ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_SEQ_CST) == 0)
-              {
-                  return TEST_SKIPPED_ITSELF;
-              }
-              break;
-          }
-          default: {
-              log_info("Invalid memory order\n");
-              break;
-          }
-      }
-
-      return 0;
-  }
-  virtual bool SVMDataBufferAllSVMConsistent() {return false;}
-  bool UseSVM() {return _useSVM;}
-  void StartValue(HostDataType startValue) {_startValue = startValue;}
-  HostDataType StartValue() {return _startValue;}
-  void LocalMemory(bool local) {_localMemory = local;}
-  bool LocalMemory() {return _localMemory;}
-  void DeclaredInProgram(bool declaredInProgram) {_declaredInProgram = declaredInProgram;}
-  bool DeclaredInProgram() {return _declaredInProgram;}
-  void UsedInFunction(bool local) {_usedInFunction = local;}
-  bool UsedInFunction() {return _usedInFunction;}
-  void GenericAddrSpace(bool genericAddrSpace) {_genericAddrSpace = genericAddrSpace;}
-  bool GenericAddrSpace() {return _genericAddrSpace;}
-  void OldValueCheck(bool check) {_oldValueCheck = check;}
-  bool OldValueCheck() {return _oldValueCheck;}
-  void LocalRefValues(bool localRefValues) {_localRefValues = localRefValues;}
-  bool LocalRefValues() {return _localRefValues;}
-  void MaxGroupSize(cl_uint maxGroupSize) {_maxGroupSize = maxGroupSize;}
-  cl_uint MaxGroupSize() {return _maxGroupSize;}
-  void CurrentGroupSize(cl_uint currentGroupSize)
-  {
-    if(MaxGroupSize() && MaxGroupSize() < currentGroupSize)
-      _currentGroupSize = MaxGroupSize();
-    else
-      _currentGroupSize = currentGroupSize;
-  }
-  cl_uint CurrentGroupSize() {return _currentGroupSize;}
-  virtual cl_uint CurrentGroupNum(cl_uint threadCount)
-  {
-    if(threadCount == 0)
-      return 0;
-    if(LocalMemory())
-      return 1;
-    return threadCount/CurrentGroupSize();
-  }
-  cl_int Iterations() {return _iterations;}
-  std::string IterationsStr() {std::stringstream ss; ss << _iterations; return ss.str();}
+    }
+    virtual bool SVMDataBufferAllSVMConsistent() { return false; }
+    bool UseSVM() { return _useSVM; }
+    void StartValue(HostDataType startValue) { _startValue = startValue; }
+    HostDataType StartValue() { return _startValue; }
+    void LocalMemory(bool local) { _localMemory = local; }
+    bool LocalMemory() { return _localMemory; }
+    void DeclaredInProgram(bool declaredInProgram)
+    {
+        _declaredInProgram = declaredInProgram;
+    }
+    bool DeclaredInProgram() { return _declaredInProgram; }
+    void UsedInFunction(bool local) { _usedInFunction = local; }
+    bool UsedInFunction() { return _usedInFunction; }
+    void GenericAddrSpace(bool genericAddrSpace)
+    {
+        _genericAddrSpace = genericAddrSpace;
+    }
+    bool GenericAddrSpace() { return _genericAddrSpace; }
+    void OldValueCheck(bool check) { _oldValueCheck = check; }
+    bool OldValueCheck() { return _oldValueCheck; }
+    void LocalRefValues(bool localRefValues)
+    {
+        _localRefValues = localRefValues;
+    }
+    bool LocalRefValues() { return _localRefValues; }
+    void MaxGroupSize(cl_uint maxGroupSize) { _maxGroupSize = maxGroupSize; }
+    cl_uint MaxGroupSize() { return _maxGroupSize; }
+    void CurrentGroupSize(cl_uint currentGroupSize)
+    {
+        if (MaxGroupSize() && MaxGroupSize() < currentGroupSize)
+            _currentGroupSize = MaxGroupSize();
+        else
+            _currentGroupSize = currentGroupSize;
+    }
+    cl_uint CurrentGroupSize() { return _currentGroupSize; }
+    virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+    {
+        if (threadCount == 0) return 0;
+        if (LocalMemory()) return 1;
+        return threadCount / CurrentGroupSize();
+    }
+    cl_int Iterations() { return _iterations; }
+    std::string IterationsStr()
+    {
+        std::stringstream ss;
+        ss << _iterations;
+        return ss.str();
+    }
+
 private:
-  const TExplicitAtomicType _dataType;
-  const bool _useSVM;
-  HostDataType	_startValue;
-  bool _localMemory;
-  bool _declaredInProgram;
-  bool _usedInFunction;
-  bool _genericAddrSpace;
-  bool _oldValueCheck;
-  bool _localRefValues;
-  cl_uint _maxGroupSize;
-  cl_uint _currentGroupSize;
-  cl_uint _passCount;
-  const cl_int _iterations;
+    const TExplicitAtomicType _dataType;
+    const bool _useSVM;
+    HostDataType _startValue;
+    bool _localMemory;
+    bool _declaredInProgram;
+    bool _usedInFunction;
+    bool _genericAddrSpace;
+    bool _oldValueCheck;
+    bool _localRefValues;
+    cl_uint _maxGroupSize;
+    cl_uint _currentGroupSize;
+    cl_uint _passCount;
+    const cl_int _iterations;
 };
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestMemOrderScope : public CBasicTest<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrderScope
+    : public CBasicTest<HostAtomicType, HostDataType> {
 public:
-  using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
-  using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-  CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-  }
-  virtual std::string ProgramHeader(cl_uint maxNumDestItems)
-  {
-    std::string header;
-    if(gOldAPI)
-    {
-      std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s";
-      header +=
-        "#define atomic_store_explicit(x,y,o"+s+")                     atomic_store(x,y)\n"
-        "#define atomic_load_explicit(x,o"+s+")                        atomic_load(x)\n"
-        "#define atomic_exchange_explicit(x,y,o"+s+")                  atomic_exchange(x,y)\n"
-        "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"+s+") atomic_compare_exchange_strong(x,y,z)\n"
-        "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"+s+")   atomic_compare_exchange_weak(x,y,z)\n"
-        "#define atomic_fetch_add_explicit(x,y,o"+s+")                 atomic_fetch_add(x,y)\n"
-        "#define atomic_fetch_sub_explicit(x,y,o"+s+")                 atomic_fetch_sub(x,y)\n"
-        "#define atomic_fetch_or_explicit(x,y,o"+s+")                  atomic_fetch_or(x,y)\n"
-        "#define atomic_fetch_xor_explicit(x,y,o"+s+")                 atomic_fetch_xor(x,y)\n"
-        "#define atomic_fetch_and_explicit(x,y,o"+s+")                 atomic_fetch_and(x,y)\n"
-        "#define atomic_fetch_min_explicit(x,y,o"+s+")                 atomic_fetch_min(x,y)\n"
-        "#define atomic_fetch_max_explicit(x,y,o"+s+")                 atomic_fetch_max(x,y)\n"
-        "#define atomic_flag_test_and_set_explicit(x,o"+s+")           atomic_flag_test_and_set(x)\n"
-        "#define atomic_flag_clear_explicit(x,o"+s+")                  atomic_flag_clear(x)\n";
-    }
-    return header+CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
-  }
-  virtual std::string SingleTestName()
-  {
-    std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
-    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
-    {
-      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
-    }
-    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
-    {
-      testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
-    }
-    return testName;
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    if(LocalMemory() &&
-      MemoryScope() != MEMORY_SCOPE_EMPTY &&
-      MemoryScope() != MEMORY_SCOPE_WORK_GROUP) //memory scope should only be used for global memory
-      return 0;
-    if(MemoryScope() == MEMORY_SCOPE_DEVICE)
-      MaxGroupSize(16); // increase number of groups by forcing smaller group size
-    else
-      MaxGroupSize(0); // group size limited by device capabilities
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
-  }
-  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    // repeat test for each reasonable memory order/scope combination
-    std::vector<TExplicitMemoryOrderType> memoryOrder;
-    std::vector<TExplicitMemoryScopeType> memoryScope;
-    int error = 0;
-
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
-
-    for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
-    {
-      for(unsigned si = 0; si < memoryScope.size(); si++)
-      {
-        if(memoryOrder[oi] == MEMORY_ORDER_EMPTY && memoryScope[si] != MEMORY_SCOPE_EMPTY)
-          continue;
-        MemoryOrder(memoryOrder[oi]);
-        MemoryScope(memoryScope[si]);
-        EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
-      }
-    }
-    return error;
-  }
-  void MemoryOrder(TExplicitMemoryOrderType memoryOrder) {_memoryOrder = memoryOrder;}
-  TExplicitMemoryOrderType MemoryOrder() {return _memoryOrder;}
-  std::string MemoryOrderStr()
-  {
-    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
-      return std::string(", ")+get_memory_order_type_name(MemoryOrder());
-    return "";
-  }
-  void MemoryScope(TExplicitMemoryScopeType memoryScope) {_memoryScope = memoryScope;}
-  TExplicitMemoryScopeType MemoryScope() {return _memoryScope;}
-  std::string MemoryScopeStr()
-  {
-    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
-      return std::string(", ")+get_memory_scope_type_name(MemoryScope());
-    return "";
-  }
-  std::string MemoryOrderScopeStr()
-  {
-    return MemoryOrderStr()+MemoryScopeStr();
-  }
-  virtual cl_uint CurrentGroupNum(cl_uint threadCount)
-  {
-    if(MemoryScope() == MEMORY_SCOPE_WORK_GROUP)
-      return 1;
-    return CBasicTest<HostAtomicType, HostDataType>::CurrentGroupNum(threadCount);
-  }
-  virtual cl_uint MaxHostThreads()
-  {
-      // block host threads execution for memory scope different than
-      // memory_scope_all_svm_devices
-      if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
-      {
-          return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
-      }
-      else
-      {
-          return 0;
-      }
-  }
+    using CBasicTest<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTest<HostAtomicType, HostDataType>::MaxGroupSize;
+    using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+    CBasicTestMemOrderScope(TExplicitAtomicType dataType, bool useSVM = false)
+        : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+    {}
+    virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+    {
+        std::string header;
+        if (gOldAPI)
+        {
+            std::string s = MemoryScope() == MEMORY_SCOPE_EMPTY ? "" : ",s";
+            header += "#define atomic_store_explicit(x,y,o" + s
+                + ")                     atomic_store(x,y)\n"
+                  "#define atomic_load_explicit(x,o"
+                + s
+                + ")                        atomic_load(x)\n"
+                  "#define atomic_exchange_explicit(x,y,o"
+                + s
+                + ")                  atomic_exchange(x,y)\n"
+                  "#define atomic_compare_exchange_strong_explicit(x,y,z,os,of"
+                + s
+                + ") atomic_compare_exchange_strong(x,y,z)\n"
+                  "#define atomic_compare_exchange_weak_explicit(x,y,z,os,of"
+                + s
+                + ")   atomic_compare_exchange_weak(x,y,z)\n"
+                  "#define atomic_fetch_add_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_add(x,y)\n"
+                  "#define atomic_fetch_sub_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_sub(x,y)\n"
+                  "#define atomic_fetch_or_explicit(x,y,o"
+                + s
+                + ")                  atomic_fetch_or(x,y)\n"
+                  "#define atomic_fetch_xor_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_xor(x,y)\n"
+                  "#define atomic_fetch_and_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_and(x,y)\n"
+                  "#define atomic_fetch_min_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_min(x,y)\n"
+                  "#define atomic_fetch_max_explicit(x,y,o"
+                + s
+                + ")                 atomic_fetch_max(x,y)\n"
+                  "#define atomic_flag_test_and_set_explicit(x,o"
+                + s
+                + ")           atomic_flag_test_and_set(x)\n"
+                  "#define atomic_flag_clear_explicit(x,o"
+                + s + ")                  atomic_flag_clear(x)\n";
+        }
+        return header
+            + CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(
+                   maxNumDestItems);
+    }
+    virtual std::string SingleTestName()
+    {
+        std::string testName =
+            CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+        if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+        {
+            testName += std::string(", ")
+                + std::string(get_memory_order_type_name(MemoryOrder()))
+                      .substr(sizeof("memory"));
+        }
+        if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+        {
+            testName += std::string(", ")
+                + std::string(get_memory_scope_type_name(MemoryScope()))
+                      .substr(sizeof("memory"));
+        }
+        return testName;
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        if (LocalMemory() && MemoryScope() != MEMORY_SCOPE_EMPTY
+            && MemoryScope()
+                != MEMORY_SCOPE_WORK_GROUP) // memory scope should only be used
+                                            // for global memory
+            return 0;
+        if (MemoryScope() == MEMORY_SCOPE_DEVICE)
+            MaxGroupSize(
+                16); // increase number of groups by forcing smaller group size
+        else
+            MaxGroupSize(0); // group size limited by device capabilities
+
+        if (CheckCapabilities(MemoryScope(), MemoryOrder())
+            == TEST_SKIPPED_ITSELF)
+            return 0; // skip test - not applicable
+
+        return CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
+            deviceID, context, queue);
+    }
+    virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue)
+    {
+        // repeat test for each reasonable memory order/scope combination
+        std::vector<TExplicitMemoryOrderType> memoryOrder;
+        std::vector<TExplicitMemoryScopeType> memoryScope;
+        int error = 0;
+
+        // For OpenCL-3.0 and later some orderings and scopes are optional, so
+        // here we query for the supported ones.
+        test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder,
+                                                         memoryScope),
+                       "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+
+        for (unsigned oi = 0; oi < memoryOrder.size(); oi++)
+        {
+            for (unsigned si = 0; si < memoryScope.size(); si++)
+            {
+                if (memoryOrder[oi] == MEMORY_ORDER_EMPTY
+                    && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+                    continue;
+                MemoryOrder(memoryOrder[oi]);
+                MemoryScope(memoryScope[si]);
+                EXECUTE_TEST(
+                    error,
+                    (CBasicTest<HostAtomicType, HostDataType>::
+                         ExecuteForEachParameterSet(deviceID, context, queue)));
+            }
+        }
+        return error;
+    }
+    void MemoryOrder(TExplicitMemoryOrderType memoryOrder)
+    {
+        _memoryOrder = memoryOrder;
+    }
+    TExplicitMemoryOrderType MemoryOrder() { return _memoryOrder; }
+    std::string MemoryOrderStr()
+    {
+        if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+            return std::string(", ")
+                + get_memory_order_type_name(MemoryOrder());
+        return "";
+    }
+    void MemoryScope(TExplicitMemoryScopeType memoryScope)
+    {
+        _memoryScope = memoryScope;
+    }
+    TExplicitMemoryScopeType MemoryScope() { return _memoryScope; }
+    std::string MemoryScopeStr()
+    {
+        if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+            return std::string(", ")
+                + get_memory_scope_type_name(MemoryScope());
+        return "";
+    }
+    std::string MemoryOrderScopeStr()
+    {
+        return MemoryOrderStr() + MemoryScopeStr();
+    }
+    virtual cl_uint CurrentGroupNum(cl_uint threadCount)
+    {
+        if (MemoryScope() == MEMORY_SCOPE_WORK_GROUP) return 1;
+        return CBasicTest<HostAtomicType, HostDataType>::CurrentGroupNum(
+            threadCount);
+    }
+    virtual cl_uint MaxHostThreads()
+    {
+        // block host threads execution for memory scope different than
+        // memory_scope_all_svm_devices
+        if (MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
+            || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES || gHost)
+        {
+            return CBasicTest<HostAtomicType, HostDataType>::MaxHostThreads();
+        }
+        else
+        {
+            return 0;
+        }
+    }
+
 private:
-  TExplicitMemoryOrderType _memoryOrder;
-  TExplicitMemoryScopeType _memoryScope;
+    TExplicitMemoryOrderType _memoryOrder;
+    TExplicitMemoryScopeType _memoryScope;
 };
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestMemOrder2Scope : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestMemOrder2Scope
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-
-  CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-  }
-  virtual std::string SingleTestName()
-  {
-    std::string testName = CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
-    if(MemoryOrder() != MEMORY_ORDER_EMPTY)
-      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder())).substr(sizeof("memory"));
-    if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
-      testName += std::string(", ")+std::string(get_memory_order_type_name(MemoryOrder2())).substr(sizeof("memory"));
-    if(MemoryScope() != MEMORY_SCOPE_EMPTY)
-      testName += std::string(", ")+std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
-    return testName;
-  }
-  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    // repeat test for each reasonable memory order/scope combination
-    std::vector<TExplicitMemoryOrderType> memoryOrder;
-    std::vector<TExplicitMemoryScopeType> memoryScope;
-    int error = 0;
-
-    // For OpenCL-3.0 and later some orderings and scopes are optional, so here
-    // we query for the supported ones.
-    test_error_ret(
-        getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder, memoryScope),
-        "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
-
-    for(unsigned oi = 0; oi < memoryOrder.size(); oi++)
-    {
-      for(unsigned o2i = 0; o2i < memoryOrder.size(); o2i++)
-      {
-        for(unsigned si = 0; si < memoryScope.size(); si++)
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+    using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+
+    CBasicTestMemOrder2Scope(TExplicitAtomicType dataType, bool useSVM = false)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {}
+    virtual std::string SingleTestName()
+    {
+        std::string testName =
+            CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+        if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+            testName += std::string(", ")
+                + std::string(get_memory_order_type_name(MemoryOrder()))
+                      .substr(sizeof("memory"));
+        if (MemoryOrder2() != MEMORY_ORDER_EMPTY)
+            testName += std::string(", ")
+                + std::string(get_memory_order_type_name(MemoryOrder2()))
+                      .substr(sizeof("memory"));
+        if (MemoryScope() != MEMORY_SCOPE_EMPTY)
+            testName += std::string(", ")
+                + std::string(get_memory_scope_type_name(MemoryScope()))
+                      .substr(sizeof("memory"));
+        return testName;
+    }
+    virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue)
+    {
+        // repeat test for each reasonable memory order/scope combination
+        std::vector<TExplicitMemoryOrderType> memoryOrder;
+        std::vector<TExplicitMemoryScopeType> memoryScope;
+        int error = 0;
+
+        // For OpenCL-3.0 and later some orderings and scopes are optional, so
+        // here we query for the supported ones.
+        test_error_ret(getSupportedMemoryOrdersAndScopes(deviceID, memoryOrder,
+                                                         memoryScope),
+                       "getSupportedMemoryOrdersAndScopes failed\n", TEST_FAIL);
+
+        for (unsigned oi = 0; oi < memoryOrder.size(); oi++)
         {
-          if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-            && memoryOrder[oi] != memoryOrder[o2i])
-            continue; // both memory order arguments must be set (or none)
-          if((memoryOrder[oi] == MEMORY_ORDER_EMPTY || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
-            && memoryScope[si] != MEMORY_SCOPE_EMPTY)
-            continue; // memory scope without memory order is not allowed
-          MemoryOrder(memoryOrder[oi]);
-          MemoryOrder2(memoryOrder[o2i]);
-          MemoryScope(memoryScope[si]);
-
-          if (CheckCapabilities(MemoryScope(), MemoryOrder())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
-
-          if (CheckCapabilities(MemoryScope(), MemoryOrder2())
-              == TEST_SKIPPED_ITSELF)
-              continue; // skip test - not applicable
-
-          EXECUTE_TEST(error, (CBasicTest<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
+            for (unsigned o2i = 0; o2i < memoryOrder.size(); o2i++)
+            {
+                for (unsigned si = 0; si < memoryScope.size(); si++)
+                {
+                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
+                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+                        && memoryOrder[oi] != memoryOrder[o2i])
+                        continue; // both memory order arguments must be set (or
+                                  // none)
+                    if ((memoryOrder[oi] == MEMORY_ORDER_EMPTY
+                         || memoryOrder[o2i] == MEMORY_ORDER_EMPTY)
+                        && memoryScope[si] != MEMORY_SCOPE_EMPTY)
+                        continue; // memory scope without memory order is not
+                                  // allowed
+                    MemoryOrder(memoryOrder[oi]);
+                    MemoryOrder2(memoryOrder[o2i]);
+                    MemoryScope(memoryScope[si]);
+
+                    if (CheckCapabilities(MemoryScope(), MemoryOrder())
+                        == TEST_SKIPPED_ITSELF)
+                        continue; // skip test - not applicable
+
+                    if (CheckCapabilities(MemoryScope(), MemoryOrder2())
+                        == TEST_SKIPPED_ITSELF)
+                        continue; // skip test - not applicable
+
+                    EXECUTE_TEST(error,
+                                 (CBasicTest<HostAtomicType, HostDataType>::
+                                      ExecuteForEachParameterSet(
+                                          deviceID, context, queue)));
+                }
+            }
         }
-      }
-    }
-    return error;
-  }
-  void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail) {_memoryOrder2 = memoryOrderFail;}
-  TExplicitMemoryOrderType MemoryOrder2() {return _memoryOrder2;}
-  std::string MemoryOrderFailStr()
-  {
-    if(MemoryOrder2() != MEMORY_ORDER_EMPTY)
-      return std::string(", ")+get_memory_order_type_name(MemoryOrder2());
-    return "";
-  }
-  std::string MemoryOrderScope()
-  {
-    return MemoryOrderStr()+MemoryOrderFailStr()+MemoryScopeStr();
-  }
+        return error;
+    }
+    void MemoryOrder2(TExplicitMemoryOrderType memoryOrderFail)
+    {
+        _memoryOrder2 = memoryOrderFail;
+    }
+    TExplicitMemoryOrderType MemoryOrder2() { return _memoryOrder2; }
+    std::string MemoryOrderFailStr()
+    {
+        if (MemoryOrder2() != MEMORY_ORDER_EMPTY)
+            return std::string(", ")
+                + get_memory_order_type_name(MemoryOrder2());
+        return "";
+    }
+    std::string MemoryOrderScope()
+    {
+        return MemoryOrderStr() + MemoryOrderFailStr() + MemoryScopeStr();
+    }
+
 private:
-  TExplicitMemoryOrderType _memoryOrder2;
+    TExplicitMemoryOrderType _memoryOrder2;
 };
 
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::PragmaHeader(cl_device_id deviceID)
 {
-  std::string pragma;
-
-  if(gOldAPI)
-  {
-    pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n";
-    pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n";
-    pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n";
-    pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n";
-  }
-  // Create the pragma lines for this kernel
-  if(DataType().Size(deviceID) == 8)
-  {
-    pragma += "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
-    pragma += "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
-  }
-  if(_dataType == TYPE_ATOMIC_DOUBLE)
-    pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-  return pragma;
+    std::string pragma;
+
+    if (gOldAPI)
+    {
+        pragma += "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : "
+                  "enable\n";
+        pragma += "#pragma OPENCL EXTENSION "
+                  "cl_khr_local_int32_extended_atomics : enable\n";
+        pragma += "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : "
+                  "enable\n";
+        pragma += "#pragma OPENCL EXTENSION "
+                  "cl_khr_global_int32_extended_atomics : enable\n";
+    }
+    // Create the pragma lines for this kernel
+    if (DataType().Size(deviceID) == 8)
+    {
+        pragma +=
+            "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n";
+        pragma +=
+            "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n";
+    }
+    if (_dataType == TYPE_ATOMIC_DOUBLE)
+        pragma += "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+    return pragma;
 }
 
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::ProgramHeader(cl_uint maxNumDestItems)
 {
-  // Create the program header
-  std::string header;
-  std::string aTypeName = DataType().AtomicTypeName();
-  std::string cTypeName = DataType().RegularTypeName();
-  std::string argListForKernel;
-  std::string argListForFunction;
-  std::string argListNoTypes;
-  std::string functionPrototype;
-  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
-
-  if(gOldAPI)
-  {
-    header += std::string("#define ")+aTypeName+" "+cTypeName+"\n"
-      "#define atomic_store(x,y)                                (*(x) = y)\n"
-      "#define atomic_load(x)                                   (*(x))\n"
-      "#define ATOMIC_VAR_INIT(x)                               (x)\n"
-      "#define ATOMIC_FLAG_INIT                                 0\n"
-      "#define atomic_init(x,y)                                 atomic_store(x,y)\n";
-    if(aTypeName == "atomic_float")
-      header += "#define atomic_exchange(x,y)                             atomic_xchg(x,y)\n";
-    else if(aTypeName == "atomic_double")
-      header += "double atomic_exchange(volatile "+addressSpace+"atomic_double *x, double y)\n"
-        "{\n"
-        "  long tmp = *(long*)&y, res;\n"
-        "  volatile "+addressSpace+"long *tmpA = (volatile "+addressSpace+"long)x;\n"
-        "  res = atom_xchg(tmpA,tmp);\n"
-        "  return *(double*)&res;\n"
-        "}\n";
-    else
-      header += "#define atomic_exchange(x,y)                             atom_xchg(x,y)\n";
-    if(aTypeName != "atomic_float" && aTypeName != "atomic_double")
-      header +=
-      "bool atomic_compare_exchange_strong(volatile "+addressSpace+" "+aTypeName+" *a, "+cTypeName+" *expected, "+cTypeName+" desired)\n"
-      "{\n"
-      "  "+cTypeName+" old = atom_cmpxchg(a, *expected, desired);\n"
-      "  if(old == *expected)\n"
-      "    return true;\n"
-      "  *expected = old;\n"
-      "  return false;\n"
-      "}\n"
-      "#define atomic_compare_exchange_weak                     atomic_compare_exchange_strong\n";
-    header +=
-      "#define atomic_fetch_add(x,y)                            atom_add(x,y)\n"
-      "#define atomic_fetch_sub(x,y)                            atom_sub(x,y)\n"
-      "#define atomic_fetch_or(x,y)                             atom_or(x,y)\n"
-      "#define atomic_fetch_xor(x,y)                            atom_xor(x,y)\n"
-      "#define atomic_fetch_and(x,y)                            atom_and(x,y)\n"
-      "#define atomic_fetch_min(x,y)                            atom_min(x,y)\n"
-      "#define atomic_fetch_max(x,y)                            atom_max(x,y)\n"
-      "#define atomic_flag_test_and_set(x)                      atomic_exchange(x,1)\n"
-      "#define atomic_flag_clear(x)                             atomic_store(x,0)\n"
-      "\n";
-  }
-  if(!LocalMemory() && DeclaredInProgram())
-  {
-    // additional atomic variable for results copying (last thread will do this)
-    header += "__global volatile atomic_uint finishedThreads = ATOMIC_VAR_INIT(0);\n";
-    // atomic variables declared in program scope - test data
-    std::stringstream ss;
-    ss << maxNumDestItems;
-    header += std::string("__global volatile ")+aTypeName+" destMemory["+ss.str()+"] = {\n";
-    ss.str("");
-    ss << _startValue;
-    for(cl_uint i = 0; i < maxNumDestItems; i++)
-    {
-      if(aTypeName == "atomic_flag")
-        header +=  "  ATOMIC_FLAG_INIT";
-      else
-        header +=  "  ATOMIC_VAR_INIT("+ss.str()+")";
-      if(i+1 < maxNumDestItems)
-        header += ",";
-      header += "\n";
-    }
-    header+=
-      "};\n"
-      "\n";
-  }
-  return header;
+    // Create the program header
+    std::string header;
+    std::string aTypeName = DataType().AtomicTypeName();
+    std::string cTypeName = DataType().RegularTypeName();
+    std::string argListForKernel;
+    std::string argListForFunction;
+    std::string argListNoTypes;
+    std::string functionPrototype;
+    std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+
+    if (gOldAPI)
+    {
+        header += std::string("#define ") + aTypeName + " " + cTypeName
+            + "\n"
+              "#define atomic_store(x,y)                                (*(x) "
+              "= y)\n"
+              "#define atomic_load(x)                                   "
+              "(*(x))\n"
+              "#define ATOMIC_VAR_INIT(x)                               (x)\n"
+              "#define ATOMIC_FLAG_INIT                                 0\n"
+              "#define atomic_init(x,y)                                 "
+              "atomic_store(x,y)\n";
+        if (aTypeName == "atomic_float")
+            header += "#define atomic_exchange(x,y)                            "
+                      " atomic_xchg(x,y)\n";
+        else if (aTypeName == "atomic_double")
+            header += "double atomic_exchange(volatile " + addressSpace
+                + "atomic_double *x, double y)\n"
+                  "{\n"
+                  "  long tmp = *(long*)&y, res;\n"
+                  "  volatile "
+                + addressSpace + "long *tmpA = (volatile " + addressSpace
+                + "long)x;\n"
+                  "  res = atom_xchg(tmpA,tmp);\n"
+                  "  return *(double*)&res;\n"
+                  "}\n";
+        else
+            header += "#define atomic_exchange(x,y)                            "
+                      " atom_xchg(x,y)\n";
+        if (aTypeName != "atomic_float" && aTypeName != "atomic_double")
+            header += "bool atomic_compare_exchange_strong(volatile "
+                + addressSpace + " " + aTypeName + " *a, " + cTypeName
+                + " *expected, " + cTypeName
+                + " desired)\n"
+                  "{\n"
+                  "  "
+                + cTypeName
+                + " old = atom_cmpxchg(a, *expected, desired);\n"
+                  "  if(old == *expected)\n"
+                  "    return true;\n"
+                  "  *expected = old;\n"
+                  "  return false;\n"
+                  "}\n"
+                  "#define atomic_compare_exchange_weak                     "
+                  "atomic_compare_exchange_strong\n";
+        header += "#define atomic_fetch_add(x,y)                            "
+                  "atom_add(x,y)\n"
+                  "#define atomic_fetch_sub(x,y)                            "
+                  "atom_sub(x,y)\n"
+                  "#define atomic_fetch_or(x,y)                             "
+                  "atom_or(x,y)\n"
+                  "#define atomic_fetch_xor(x,y)                            "
+                  "atom_xor(x,y)\n"
+                  "#define atomic_fetch_and(x,y)                            "
+                  "atom_and(x,y)\n"
+                  "#define atomic_fetch_min(x,y)                            "
+                  "atom_min(x,y)\n"
+                  "#define atomic_fetch_max(x,y)                            "
+                  "atom_max(x,y)\n"
+                  "#define atomic_flag_test_and_set(x)                      "
+                  "atomic_exchange(x,1)\n"
+                  "#define atomic_flag_clear(x)                             "
+                  "atomic_store(x,0)\n"
+                  "\n";
+    }
+    if (!LocalMemory() && DeclaredInProgram())
+    {
+        // additional atomic variable for results copying (last thread will do
+        // this)
+        header += "__global volatile atomic_uint finishedThreads = "
+                  "ATOMIC_VAR_INIT(0);\n";
+        // atomic variables declared in program scope - test data
+        std::stringstream ss;
+        ss << maxNumDestItems;
+        header += std::string("__global volatile ") + aTypeName + " destMemory["
+            + ss.str() + "] = {\n";
+        ss.str("");
+        ss << _startValue;
+        for (cl_uint i = 0; i < maxNumDestItems; i++)
+        {
+            if (aTypeName == "atomic_flag")
+                header += "  ATOMIC_FLAG_INIT";
+            else
+                header += "  ATOMIC_VAR_INIT(" + ss.str() + ")";
+            if (i + 1 < maxNumDestItems) header += ",";
+            header += "\n";
+        }
+        header += "};\n"
+                  "\n";
+    }
+    return header;
 }
 
-template<typename HostAtomicType, typename HostDataType>
+template <typename HostAtomicType, typename HostDataType>
 std::string CBasicTest<HostAtomicType, HostDataType>::FunctionCode()
 {
-  if(!UsedInFunction())
-    return "";
-  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
-  std::string code = "void test_atomic_function(uint tid, uint threadCount, uint numDestItems, volatile ";
-  if(!GenericAddrSpace())
-    code += addressSpace;
-  code += std::string(DataType().AtomicTypeName())+" *destMemory, __global "+DataType().RegularTypeName()+
-    " *oldValues";
-  if(LocalRefValues())
-    code += std::string(", __local ")+DataType().RegularTypeName()+" *localValues";
-  code += ")\n"
-    "{\n";
-  code += ProgramCore();
-  code += "}\n"
-    "\n";
-  return code;
+    if (!UsedInFunction()) return "";
+    std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+    std::string code = "void test_atomic_function(uint tid, uint threadCount, "
+                       "uint numDestItems, volatile ";
+    if (!GenericAddrSpace()) code += addressSpace;
+    code += std::string(DataType().AtomicTypeName()) + " *destMemory, __global "
+        + DataType().RegularTypeName() + " *oldValues";
+    if (LocalRefValues())
+        code += std::string(", __local ") + DataType().RegularTypeName()
+            + " *localValues";
+    code += ")\n"
+            "{\n";
+    code += ProgramCore();
+    code += "}\n"
+            "\n";
+    return code;
 }
 
-template<typename HostAtomicType, typename HostDataType>
-std::string CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
+template <typename HostAtomicType, typename HostDataType>
+std::string
+CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
 {
-  std::string aTypeName = DataType().AtomicTypeName();
-  std::string cTypeName = DataType().RegularTypeName();
-  std::string addressSpace = LocalMemory() ? "__local " : "__global ";
-  std::string code = "__kernel void test_atomic_kernel(uint threadCount, uint numDestItems, ";
-
-  // prepare list of arguments for kernel
-  if(LocalMemory())
-  {
-    code += std::string("__global ")+cTypeName+" *finalDest, __global "+cTypeName+" *oldValues,"
-      " volatile "+addressSpace+aTypeName+" *"+(DeclaredInProgram() ? "notUsed" : "")+"destMemory";
-  }
-  else
-  {
-    code += "volatile "+addressSpace+(DeclaredInProgram() ? (cTypeName+" *finalDest") : (aTypeName+" *destMemory"))+
-      ", __global "+cTypeName+" *oldValues";
-  }
-  if(LocalRefValues())
-    code += std::string(", __local ")+cTypeName+" *localValues";
-  code += ")\n"
-    "{\n";
-  if(LocalMemory() && DeclaredInProgram())
-  {
-    // local atomics declared in kernel scope
-    std::stringstream ss;
-    ss << maxNumDestItems;
-    code += std::string("  __local volatile ")+aTypeName+" destMemory["+ss.str()+"];\n";
-  }
-  code += "  uint  tid = get_global_id(0);\n"
-    "\n";
-  if(LocalMemory())
-  {
-      // memory_order_relaxed is sufficient for these initialization operations
-      // as the barrier below will act as a fence, providing an order to the
-      // operations. memory_scope_work_group is sufficient as local memory is
-      // only visible within the work-group.
-      code += R"(
+    std::string aTypeName = DataType().AtomicTypeName();
+    std::string cTypeName = DataType().RegularTypeName();
+    std::string addressSpace = LocalMemory() ? "__local " : "__global ";
+    std::string code = "__kernel void test_atomic_kernel(uint threadCount, "
+                       "uint numDestItems, ";
+
+    // prepare list of arguments for kernel
+    if (LocalMemory())
+    {
+        code += std::string("__global ") + cTypeName + " *finalDest, __global "
+            + cTypeName
+            + " *oldValues,"
+              " volatile "
+            + addressSpace + aTypeName + " *"
+            + (DeclaredInProgram() ? "notUsed" : "") + "destMemory";
+    }
+    else
+    {
+        code += "volatile " + addressSpace
+            + (DeclaredInProgram() ? (cTypeName + " *finalDest")
+                                   : (aTypeName + " *destMemory"))
+            + ", __global " + cTypeName + " *oldValues";
+    }
+    if (LocalRefValues())
+        code += std::string(", __local ") + cTypeName + " *localValues";
+    code += ")\n"
+            "{\n";
+    if (LocalMemory() && DeclaredInProgram())
+    {
+        // local atomics declared in kernel scope
+        std::stringstream ss;
+        ss << maxNumDestItems;
+        code += std::string("  __local volatile ") + aTypeName + " destMemory["
+            + ss.str() + "];\n";
+    }
+    code += "  uint  tid = get_global_id(0);\n"
+            "\n";
+    if (LocalMemory())
+    {
+        // memory_order_relaxed is sufficient for these initialization
+        // operations as the barrier below will act as a fence, providing an
+        // order to the operations. memory_scope_work_group is sufficient as
+        // local memory is only visible within the work-group.
+        code += R"(
               // initialize atomics not reachable from host (first thread
               // is doing this, other threads are waiting on barrier)
               if(get_local_id(0) == 0)
                 for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)
                 {)";
-      if (aTypeName == "atomic_flag")
-      {
-          code += R"(
+        if (aTypeName == "atomic_flag")
+        {
+            code += R"(
                   if(finalDest[dstItemIdx])
                     atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
                                                       memory_order_relaxed,
@@ -823,512 +976,565 @@ std::string CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumD
                     atomic_flag_clear_explicit(destMemory+dstItemIdx,
                                                memory_order_relaxed,
                                                memory_scope_work_group);)";
-      }
-    else
-    {
-        code += R"(
+        }
+        else
+        {
+            code += R"(
                 atomic_store_explicit(destMemory+dstItemIdx,
                                       finalDest[dstItemIdx],
                                       memory_order_relaxed,
                                       memory_scope_work_group);)";
+        }
+        code += "    }\n"
+                "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "\n";
     }
-    code +=
-      "    }\n"
-      "  barrier(CLK_LOCAL_MEM_FENCE);\n"
-      "\n";
-  }
-  if (LocalRefValues())
-  {
-    code +=
-      "  // Copy input reference values into local memory\n";
-    if (NumNonAtomicVariablesPerThread() == 1)
-      code += "  localValues[get_local_id(0)] = oldValues[tid];\n";
-    else
+    if (LocalRefValues())
     {
-      std::stringstream ss;
-      ss << NumNonAtomicVariablesPerThread();
-      code +=
-        "  for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
-        "    localValues[get_local_id(0)*" + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n";
-    }
-    code +=
-      "  barrier(CLK_LOCAL_MEM_FENCE);\n"
-      "\n";
-  }
-  if (UsedInFunction())
-    code += std::string("  test_atomic_function(tid, threadCount, numDestItems, destMemory, oldValues")+
-    (LocalRefValues() ? ", localValues" : "")+");\n";
-  else
-    code += ProgramCore();
-  code += "\n";
-  if (LocalRefValues())
-  {
-    code +=
-      "  // Copy local reference values into output array\n"
-      "  barrier(CLK_LOCAL_MEM_FENCE);\n";
-    if (NumNonAtomicVariablesPerThread() == 1)
-      code += "  oldValues[tid] = localValues[get_local_id(0)];\n";
+        code += "  // Copy input reference values into local memory\n";
+        if (NumNonAtomicVariablesPerThread() == 1)
+            code += "  localValues[get_local_id(0)] = oldValues[tid];\n";
+        else
+        {
+            std::stringstream ss;
+            ss << NumNonAtomicVariablesPerThread();
+            code += "  for(uint rfId = 0; rfId < " + ss.str()
+                + "; rfId++)\n"
+                  "    localValues[get_local_id(0)*"
+                + ss.str() + "+rfId] = oldValues[tid*" + ss.str() + "+rfId];\n";
+        }
+        code += "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "\n";
+    }
+    if (UsedInFunction())
+        code += std::string("  test_atomic_function(tid, threadCount, "
+                            "numDestItems, destMemory, oldValues")
+            + (LocalRefValues() ? ", localValues" : "") + ");\n";
     else
+        code += ProgramCore();
+    code += "\n";
+    if (LocalRefValues())
     {
-      std::stringstream ss;
-      ss << NumNonAtomicVariablesPerThread();
-      code +=
-        "  for(uint rfId = 0; rfId < " + ss.str() + "; rfId++)\n"
-        "    oldValues[tid*" + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str() + "+rfId];\n";
+        code += "  // Copy local reference values into output array\n"
+                "  barrier(CLK_LOCAL_MEM_FENCE);\n";
+        if (NumNonAtomicVariablesPerThread() == 1)
+            code += "  oldValues[tid] = localValues[get_local_id(0)];\n";
+        else
+        {
+            std::stringstream ss;
+            ss << NumNonAtomicVariablesPerThread();
+            code += "  for(uint rfId = 0; rfId < " + ss.str()
+                + "; rfId++)\n"
+                  "    oldValues[tid*"
+                + ss.str() + "+rfId] = localValues[get_local_id(0)*" + ss.str()
+                + "+rfId];\n";
+        }
+        code += "\n";
     }
-    code += "\n";
-  }
-  if(LocalMemory() || DeclaredInProgram())
-  {
-    code += "  // Copy final values to host reachable buffer\n";
-    if(LocalMemory())
-      code +=
-        "  barrier(CLK_LOCAL_MEM_FENCE);\n"
-        "  if(get_local_id(0) == 0) // first thread in workgroup\n";
-    else
-      // global atomics declared in program scope
-      code += R"(
+    if (LocalMemory() || DeclaredInProgram())
+    {
+        code += "  // Copy final values to host reachable buffer\n";
+        if (LocalMemory())
+            code += "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+                    "  if(get_local_id(0) == 0) // first thread in workgroup\n";
+        else
+            // global atomics declared in program scope
+            code += R"(
                 if(atomic_fetch_add_explicit(&finishedThreads, 1u,
                                            memory_order_relaxed,
                                            memory_scope_work_group)
                    == get_global_size(0)-1) // last finished thread
                    )";
-    code +=
-        "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++)\n";
-    if(aTypeName == "atomic_flag")
-    {
-        code += R"(
+        code += "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
+                "dstItemIdx++)\n";
+        if (aTypeName == "atomic_flag")
+        {
+            code += R"(
                 finalDest[dstItemIdx] =
                     atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
                                                       memory_order_relaxed,
                                                       memory_scope_work_group);)";
-    }
-    else
-    {
-        code += R"(
+        }
+        else
+        {
+            code += R"(
                 finalDest[dstItemIdx] =
                     atomic_load_explicit(destMemory+dstItemIdx,
                                          memory_order_relaxed,
                                          memory_scope_work_group);)";
+        }
     }
-  }
-  code += "}\n"
-    "\n";
-  return code;
+    code += "}\n"
+            "\n";
+    return code;
 }
 
 template <typename HostAtomicType, typename HostDataType>
-int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
+int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue)
 {
-  int error;
-  clProgramWrapper program;
-  clKernelWrapper kernel;
-  size_t threadNum[1];
-  clMemWrapper streams[2];
-  std::vector<HostAtomicType> destItems;
-  HostAtomicType *svmAtomicBuffer = 0;
-  std::vector<HostDataType> refValues, startRefValues;
-  HostDataType *svmDataBuffer = 0;
-  cl_uint deviceThreadCount, hostThreadCount, threadCount;
-  size_t groupSize = 0;
-  std::string programSource;
-  const char *programLine;
-  MTdata d;
-  size_t typeSize = DataType().Size(deviceID);
-
-  deviceThreadCount = _maxDeviceThreads;
-  hostThreadCount = MaxHostThreads();
-  threadCount = deviceThreadCount+hostThreadCount;
-
-  //log_info("\t%s %s%s...\n", local ? "local" : "global", DataType().AtomicTypeName(), memoryOrderScope.c_str());
-  log_info("\t%s...\n", SingleTestName().c_str());
-
-  if(!LocalMemory() && DeclaredInProgram() && gNoGlobalVariables) // no support for program scope global variables
-  {
-    log_info("\t\tTest disabled\n");
-    return 0;
-  }
-  if(UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace)
-  {
-    log_info("\t\tTest disabled\n");
-    return 0;
-  }
-
-  // set up work sizes based on device capabilities and test configuration
-  error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(groupSize), &groupSize, NULL);
-  test_error(error, "Unable to obtain max work group size for device");
-  CurrentGroupSize((cl_uint)groupSize);
-  if(CurrentGroupSize() > deviceThreadCount)
-    CurrentGroupSize(deviceThreadCount);
-  if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
-    deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
-  threadCount = deviceThreadCount+hostThreadCount;
-
-  // If we're given a num_results function, we need to determine how many result objects we need.
-  // This is the first assessment for current maximum number of threads (exact thread count is not known here)
-  // - needed for program source code generation (arrays of atomics declared in program)
-  cl_uint numDestItems = NumResults(threadCount, deviceID);
-
-  if(deviceThreadCount > 0)
-  {
-      // This loop iteratively reduces the workgroup size by 2 and then
-      // re-generates the kernel with the reduced
-      // workgroup size until we find a size which is admissible for the kernel
-      // being run or reduce the wg size
-      // to the trivial case of 1 (which was separately verified to be accurate
-      // for the kernel being run)
-
-      while ((CurrentGroupSize() > 1))
-      {
-          // Re-generate the kernel code with the current group size
-          if (kernel) clReleaseKernel(kernel);
-          if (program) clReleaseProgram(program);
-          programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
-              + FunctionCode() + KernelCode(numDestItems);
-          programLine = programSource.c_str();
-          if (create_single_kernel_helper_with_build_options(
-                  context, &program, &kernel, 1, &programLine,
-                  "test_atomic_kernel", gOldAPI ? "" : nullptr))
-          {
-              return -1;
-          }
-          // Get work group size for the new kernel
-          error = clGetKernelWorkGroupInfo(kernel, deviceID,
-                                           CL_KERNEL_WORK_GROUP_SIZE,
-                                           sizeof(groupSize), &groupSize, NULL);
-          test_error(error,
-                     "Unable to obtain max work group size for device and "
-                     "kernel combo");
-
-          if (LocalMemory())
-          {
-              cl_ulong usedLocalMemory;
-              cl_ulong totalLocalMemory;
-              cl_uint maxWorkGroupSize;
-
-              error = clGetKernelWorkGroupInfo(
-                  kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
-                  sizeof(usedLocalMemory), &usedLocalMemory, NULL);
-              test_error(error, "clGetKernelWorkGroupInfo failed");
-
-              error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
-                                      sizeof(totalLocalMemory),
-                                      &totalLocalMemory, NULL);
-              test_error(error, "clGetDeviceInfo failed");
-
-              // We know that each work-group is going to use typeSize *
-              // deviceThreadCount bytes of local memory
-              // so pick the maximum value for deviceThreadCount that uses all
-              // the local memory.
-              maxWorkGroupSize =
-                  ((totalLocalMemory - usedLocalMemory) / typeSize);
-
-              if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
-          }
-          if (CurrentGroupSize() <= groupSize)
-              break;
-          else
-              CurrentGroupSize(CurrentGroupSize() / 2);
-      }
-    if(CurrentGroupSize() > deviceThreadCount)
-      CurrentGroupSize(deviceThreadCount);
-    if(CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
-      deviceThreadCount = CurrentGroupSize()*CurrentGroupNum(deviceThreadCount);
-    threadCount = deviceThreadCount+hostThreadCount;
-  }
-  if (gDebug)
-  {
-      log_info("Program source:\n");
-      log_info("%s\n", programLine);
-  }
-  if(deviceThreadCount > 0)
-    log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount, CurrentGroupSize());
-  if(hostThreadCount > 0)
-    log_info("\t\t(host threads %u)\n", hostThreadCount);
-
-  refValues.resize(threadCount*NumNonAtomicVariablesPerThread());
-
-  // Generate ref data if we have a ref generator provided
-  d = init_genrand(gRandomSeed);
-  startRefValues.resize(threadCount*NumNonAtomicVariablesPerThread());
-  if(GenerateRefs(threadCount, &startRefValues[0], d))
-  {
-    //copy ref values for host threads
-    memcpy(&refValues[0], &startRefValues[0], sizeof(HostDataType)*threadCount*NumNonAtomicVariablesPerThread());
-  }
-  else
-  {
-    startRefValues.resize(0);
-  }
-  free_mtdata(d);
-  d = NULL;
-
-  // If we're given a num_results function, we need to determine how many result objects we need. If
-  // we don't have it, we assume it's just 1
-  // This is final value (exact thread count is known in this place)
-  numDestItems = NumResults(threadCount, deviceID);
-
-  destItems.resize(numDestItems);
-  for(cl_uint i = 0; i < numDestItems; i++)
-    destItems[i] = _startValue;
-
-  // Create main buffer with atomic variables (array size dependent on particular test)
-  if(UseSVM())
-  {
-    if(gUseHostPtr)
-      svmAtomicBuffer = (HostAtomicType*)malloc(typeSize * numDestItems);
-    else
-      svmAtomicBuffer = (HostAtomicType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS, typeSize * numDestItems, 0);
-    if(!svmAtomicBuffer)
-    {
-      log_error("ERROR: clSVMAlloc failed!\n");
-      return -1;
-    }
-    memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
-    streams[0] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * numDestItems, svmAtomicBuffer, NULL);
-  }
-  else
-  {
-      streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                                  typeSize * numDestItems, &destItems[0], NULL);
-  }
-  if (!streams[0])
-  {
-    log_error("ERROR: Creating output array failed!\n");
-    return -1;
-  }
-  // Create buffer for per-thread input/output data
-  if(UseSVM())
-  {
-    if(gUseHostPtr)
-      svmDataBuffer = (HostDataType*)malloc(typeSize*threadCount*NumNonAtomicVariablesPerThread());
-    else
-      svmDataBuffer = (HostDataType*)clSVMAlloc(context, CL_MEM_SVM_FINE_GRAIN_BUFFER | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS : 0), typeSize*threadCount*NumNonAtomicVariablesPerThread(), 0);
-    if(!svmDataBuffer)
-    {
-      log_error("ERROR: clSVMAlloc failed!\n");
-      return -1;
-    }
-    if(startRefValues.size())
-      memcpy(svmDataBuffer, &startRefValues[0], typeSize*threadCount*NumNonAtomicVariablesPerThread());
-    streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
-                                typeSize * threadCount
-                                    * NumNonAtomicVariablesPerThread(),
-                                svmDataBuffer, NULL);
-  }
-  else
-  {
-      streams[1] = clCreateBuffer(
-          context,
-          ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
-          typeSize * threadCount * NumNonAtomicVariablesPerThread(),
-          startRefValues.size() ? &startRefValues[0] : 0, NULL);
-  }
-  if (!streams[1])
-  {
-    log_error("ERROR: Creating reference array failed!\n");
-    return -1;
-  }
-  if(deviceThreadCount > 0)
-  {
-    cl_uint argInd = 0;
-    /* Set the arguments */
-    error = clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount);
-    test_error(error, "Unable to set kernel argument");
-    error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems), &numDestItems);
-    test_error(error, "Unable to set indexed kernel argument");
-    error = clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]);
-    test_error(error, "Unable to set indexed kernel arguments");
-    error = clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]);
-    test_error(error, "Unable to set indexed kernel arguments");
-    if(LocalMemory())
-    {
-      error = clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL);
-      test_error(error, "Unable to set indexed local kernel argument");
-    }
-    if(LocalRefValues())
-    {
-      error = clSetKernelArg(kernel, argInd++, LocalRefValues() ? typeSize*CurrentGroupSize()*NumNonAtomicVariablesPerThread() : 1, NULL);
-      test_error(error, "Unable to set indexed kernel argument");
-    }
-  }
-  /* Configure host threads */
-  std::vector<THostThreadContext> hostThreadContexts(hostThreadCount);
-  for(unsigned int t = 0; t < hostThreadCount; t++)
-  {
-    hostThreadContexts[t].test = this;
-    hostThreadContexts[t].tid = deviceThreadCount+t;
-    hostThreadContexts[t].threadCount = threadCount;
-    hostThreadContexts[t].destMemory = UseSVM() ? svmAtomicBuffer : &destItems[0];
-    hostThreadContexts[t].oldValues = UseSVM() ? svmDataBuffer : &refValues[0];
-  }
-
-  if(deviceThreadCount > 0)
-  {
-    /* Run the kernel */
-    threadNum[0] = deviceThreadCount;
-    groupSize = CurrentGroupSize();
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, &groupSize, 0, NULL, NULL);
-    test_error(error, "Unable to execute test kernel");
-    /* start device threads */
-    error = clFlush(queue);
-    test_error(error, "clFlush failed");
-  }
-
-  /* Start host threads and wait for finish */
-  if(hostThreadCount > 0)
-    ThreadPool_Do(HostThreadFunction, hostThreadCount, &hostThreadContexts[0]);
-
-  if(UseSVM())
-  {
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
-    memcpy(&destItems[0], svmAtomicBuffer, typeSize*numDestItems);
-    memcpy(&refValues[0], svmDataBuffer, typeSize*threadCount*NumNonAtomicVariablesPerThread());
-  }
-  else
-  {
-    if(deviceThreadCount > 0)
-    {
-      error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
-      test_error(error, "Unable to read result value!");
-      error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize * deviceThreadCount*NumNonAtomicVariablesPerThread(), &refValues[0], 0, NULL, NULL);
-      test_error(error, "Unable to read reference values!");
-    }
-  }
-  bool dataVerified = false;
-  // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
-  // have one, it's because we're comparing ref values only
-  for(cl_uint i = 0; i < numDestItems; i++)
-  {
-    HostDataType expected;
-
-    if(!ExpectedValue(expected, threadCount, startRefValues.size() ? &startRefValues[0] : 0, i))
-      break; // no expected value function provided
-
-    if(expected != destItems[i])
-    {
-      std::stringstream logLine;
-      logLine << "ERROR: Result " << i << " from kernel does not validate! (should be " << expected << ", was " << destItems[i] << ")\n";
-      log_error("%s", logLine.str().c_str());
-      for(i = 0; i < threadCount; i++)
-      {
-        logLine.str("");
-        logLine << " --- " << i << " - ";
-        if(startRefValues.size())
-          logLine << startRefValues[i] << " -> " << refValues[i];
-        else
-          logLine << refValues[i];
-        logLine << " --- ";
-        if(i < numDestItems)
-          logLine << destItems[i];
-        logLine << "\n";
-        log_info("%s", logLine.str().c_str());
-      }
-      if(!gDebug)
-      {
-        log_info("Program source:\n");
-        log_info("%s\n", programLine);
-      }
-      return -1;
-    }
-    dataVerified = true;
-  }
-
-  bool dataCorrect = false;
-  /* Use the verify function (if provided) to also check the results */
-  if(VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0]))
-  {
-    if(!dataCorrect)
-    {
-      log_error("ERROR: Reference values did not validate!\n");
-      std::stringstream logLine;
-      for(cl_uint i = 0; i < threadCount; i++)
-      for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++)
-      {
-        logLine.str("");
-        logLine << " --- " << i << " - " << refValues[i*NumNonAtomicVariablesPerThread()+j] << " --- ";
-        if(j == 0 && i < numDestItems)
-          logLine << destItems[i];
-        logLine << "\n";
-        log_info("%s", logLine.str().c_str());
-      }
-      if(!gDebug)
-      {
+    int error;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    size_t threadNum[1];
+    clMemWrapper streams[2];
+    std::vector<HostAtomicType> destItems;
+    HostAtomicType *svmAtomicBuffer = 0;
+    std::vector<HostDataType> refValues, startRefValues;
+    HostDataType *svmDataBuffer = 0;
+    cl_uint deviceThreadCount, hostThreadCount, threadCount;
+    size_t groupSize = 0;
+    std::string programSource;
+    const char *programLine;
+    MTdata d;
+    size_t typeSize = DataType().Size(deviceID);
+
+    deviceThreadCount = _maxDeviceThreads;
+    hostThreadCount = MaxHostThreads();
+    threadCount = deviceThreadCount + hostThreadCount;
+
+    // log_info("\t%s %s%s...\n", local ? "local" : "global",
+    // DataType().AtomicTypeName(), memoryOrderScope.c_str());
+    log_info("\t%s...\n", SingleTestName().c_str());
+
+    if (!LocalMemory() && DeclaredInProgram()
+        && gNoGlobalVariables) // no support for program scope global variables
+    {
+        log_info("\t\tTest disabled\n");
+        return 0;
+    }
+    if (UsedInFunction() && GenericAddrSpace() && gNoGenericAddressSpace)
+    {
+        log_info("\t\tTest disabled\n");
+        return 0;
+    }
+
+    // set up work sizes based on device capabilities and test configuration
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                            sizeof(groupSize), &groupSize, NULL);
+    test_error(error, "Unable to obtain max work group size for device");
+    CurrentGroupSize((cl_uint)groupSize);
+    if (CurrentGroupSize() > deviceThreadCount)
+        CurrentGroupSize(deviceThreadCount);
+    if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+        deviceThreadCount =
+            CurrentGroupSize() * CurrentGroupNum(deviceThreadCount);
+    threadCount = deviceThreadCount + hostThreadCount;
+
+    // If we're given a num_results function, we need to determine how many
+    // result objects we need. This is the first assessment for current maximum
+    // number of threads (exact thread count is not known here)
+    // - needed for program source code generation (arrays of atomics declared
+    // in program)
+    cl_uint numDestItems = NumResults(threadCount, deviceID);
+
+    if (deviceThreadCount > 0)
+    {
+        // This loop iteratively reduces the workgroup size by 2 and then
+        // re-generates the kernel with the reduced
+        // workgroup size until we find a size which is admissible for the
+        // kernel being run or reduce the wg size to the trivial case of 1
+        // (which was separately verified to be accurate for the kernel being
+        // run)
+
+        while ((CurrentGroupSize() > 1))
+        {
+            // Re-generate the kernel code with the current group size
+            if (kernel) clReleaseKernel(kernel);
+            if (program) clReleaseProgram(program);
+            programSource = PragmaHeader(deviceID) + ProgramHeader(numDestItems)
+                + FunctionCode() + KernelCode(numDestItems);
+            programLine = programSource.c_str();
+            if (create_single_kernel_helper_with_build_options(
+                    context, &program, &kernel, 1, &programLine,
+                    "test_atomic_kernel", gOldAPI ? "" : nullptr))
+            {
+                return -1;
+            }
+            // Get work group size for the new kernel
+            error = clGetKernelWorkGroupInfo(
+                kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(groupSize),
+                &groupSize, NULL);
+            test_error(error,
+                       "Unable to obtain max work group size for device and "
+                       "kernel combo");
+
+            if (LocalMemory())
+            {
+                cl_ulong usedLocalMemory;
+                cl_ulong totalLocalMemory;
+                cl_uint maxWorkGroupSize;
+
+                error = clGetKernelWorkGroupInfo(
+                    kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
+                    sizeof(usedLocalMemory), &usedLocalMemory, NULL);
+                test_error(error, "clGetKernelWorkGroupInfo failed");
+
+                error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
+                                        sizeof(totalLocalMemory),
+                                        &totalLocalMemory, NULL);
+                test_error(error, "clGetDeviceInfo failed");
+
+                // We know that each work-group is going to use typeSize *
+                // deviceThreadCount bytes of local memory
+                // so pick the maximum value for deviceThreadCount that uses all
+                // the local memory.
+                maxWorkGroupSize =
+                    ((totalLocalMemory - usedLocalMemory) / typeSize);
+
+                if (maxWorkGroupSize < groupSize) groupSize = maxWorkGroupSize;
+            }
+            if (CurrentGroupSize() <= groupSize)
+                break;
+            else
+                CurrentGroupSize(CurrentGroupSize() / 2);
+        }
+        if (CurrentGroupSize() > deviceThreadCount)
+            CurrentGroupSize(deviceThreadCount);
+        if (CurrentGroupNum(deviceThreadCount) == 1 || gOldAPI)
+            deviceThreadCount =
+                CurrentGroupSize() * CurrentGroupNum(deviceThreadCount);
+        threadCount = deviceThreadCount + hostThreadCount;
+    }
+    if (gDebug)
+    {
         log_info("Program source:\n");
         log_info("%s\n", programLine);
-      }
-      return -1;
-    }
-  }
-  else if(!dataVerified)
-  {
-    log_error("ERROR: Test doesn't check total or refs; no values are verified!\n");
-    return -1;
-  }
-
-  if(OldValueCheck() &&
-    !(DeclaredInProgram() && !LocalMemory())) // don't test for programs scope global atomics
-                                             // 'old' value has been overwritten by previous clEnqueueNDRangeKernel
-  {
-    /* Re-write the starting value */
-    for(size_t i = 0; i < numDestItems; i++)
-      destItems[i] = _startValue;
-    refValues[0] = 0;
-    if(deviceThreadCount > 0)
-    {
-      error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, typeSize * numDestItems, &destItems[0], 0, NULL, NULL);
-      test_error(error, "Unable to write starting values!");
-
-      /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
-      threadNum[0] = 1;
-      error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum, threadNum, 0, NULL, NULL);
-      test_error(error, "Unable to execute test kernel");
-
-      error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize, &refValues[0], 0, NULL, NULL);
-      test_error(error, "Unable to read reference values!");
+    }
+    if (deviceThreadCount > 0)
+        log_info("\t\t(thread count %u, group size %u)\n", deviceThreadCount,
+                 CurrentGroupSize());
+    if (hostThreadCount > 0)
+        log_info("\t\t(host threads %u)\n", hostThreadCount);
+
+    refValues.resize(threadCount * NumNonAtomicVariablesPerThread());
+
+    // Generate ref data if we have a ref generator provided
+    d = init_genrand(gRandomSeed);
+    startRefValues.resize(threadCount * NumNonAtomicVariablesPerThread());
+    if (GenerateRefs(threadCount, &startRefValues[0], d))
+    {
+        // copy ref values for host threads
+        memcpy(&refValues[0], &startRefValues[0],
+               sizeof(HostDataType) * threadCount
+                   * NumNonAtomicVariablesPerThread());
     }
     else
     {
-      /* Start host thread */
-      HostFunction(0, 1, &destItems[0], &refValues[0]);
+        startRefValues.resize(0);
     }
+    free_mtdata(d);
+    d = NULL;
+
+    // If we're given a num_results function, we need to determine how many
+    // result objects we need. If we don't have it, we assume it's just 1 This
+    // is final value (exact thread count is known in this place)
+    numDestItems = NumResults(threadCount, deviceID);
 
-    if(refValues[0] != _startValue)//destItems[0])
+    destItems.resize(numDestItems);
+    for (cl_uint i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+
+    // Create main buffer with atomic variables (array size dependent on
+    // particular test)
+    if (UseSVM())
     {
-      std::stringstream logLine;
-      logLine << "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
-        " (should have been " << destItems[0] << ", returned " << refValues[0] << ")!\n";
-      log_error("%s", logLine.str().c_str());
-      if(!gDebug)
-      {
-        log_info("Program source:\n");
-        log_info("%s\n", programLine);
-      }
-      return -1;
-    }
-  }
-  if(UseSVM())
-  {
-    // the buffer object must first be released before the SVM buffer is freed
-    error = clReleaseMemObject(streams[0]);
-    streams[0] = 0;
-    test_error(error, "clReleaseMemObject failed");
-    if(gUseHostPtr)
-      free(svmAtomicBuffer);
+        if (gUseHostPtr)
+            svmAtomicBuffer = (HostAtomicType *)malloc(typeSize * numDestItems);
+        else
+            svmAtomicBuffer = (HostAtomicType *)clSVMAlloc(
+                context, CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS,
+                typeSize * numDestItems, 0);
+        if (!svmAtomicBuffer)
+        {
+            log_error("ERROR: clSVMAlloc failed!\n");
+            return -1;
+        }
+        memcpy(svmAtomicBuffer, &destItems[0], typeSize * numDestItems);
+        streams[0] =
+            clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                           typeSize * numDestItems, svmAtomicBuffer, NULL);
+    }
+    else
+    {
+        streams[0] =
+            clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                           typeSize * numDestItems, &destItems[0], NULL);
+    }
+    if (!streams[0])
+    {
+        log_error("ERROR: Creating output array failed!\n");
+        return -1;
+    }
+    // Create buffer for per-thread input/output data
+    if (UseSVM())
+    {
+        if (gUseHostPtr)
+            svmDataBuffer = (HostDataType *)malloc(
+                typeSize * threadCount * NumNonAtomicVariablesPerThread());
+        else
+            svmDataBuffer = (HostDataType *)clSVMAlloc(
+                context,
+                CL_MEM_SVM_FINE_GRAIN_BUFFER
+                    | (SVMDataBufferAllSVMConsistent() ? CL_MEM_SVM_ATOMICS
+                                                       : 0),
+                typeSize * threadCount * NumNonAtomicVariablesPerThread(), 0);
+        if (!svmDataBuffer)
+        {
+            log_error("ERROR: clSVMAlloc failed!\n");
+            return -1;
+        }
+        if (startRefValues.size())
+            memcpy(svmDataBuffer, &startRefValues[0],
+                   typeSize * threadCount * NumNonAtomicVariablesPerThread());
+        streams[1] = clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                    typeSize * threadCount
+                                        * NumNonAtomicVariablesPerThread(),
+                                    svmDataBuffer, NULL);
+    }
     else
-      clSVMFree(context, svmAtomicBuffer);
-    error = clReleaseMemObject(streams[1]);
-    streams[1] = 0;
-    test_error(error, "clReleaseMemObject failed");
-    if(gUseHostPtr)
-      free(svmDataBuffer);
+    {
+        streams[1] = clCreateBuffer(
+            context,
+            ((startRefValues.size() ? CL_MEM_COPY_HOST_PTR
+                                    : CL_MEM_READ_WRITE)),
+            typeSize * threadCount * NumNonAtomicVariablesPerThread(),
+            startRefValues.size() ? &startRefValues[0] : 0, NULL);
+    }
+    if (!streams[1])
+    {
+        log_error("ERROR: Creating reference array failed!\n");
+        return -1;
+    }
+    if (deviceThreadCount > 0)
+    {
+        cl_uint argInd = 0;
+        /* Set the arguments */
+        error =
+            clSetKernelArg(kernel, argInd++, sizeof(threadCount), &threadCount);
+        test_error(error, "Unable to set kernel argument");
+        error = clSetKernelArg(kernel, argInd++, sizeof(numDestItems),
+                               &numDestItems);
+        test_error(error, "Unable to set indexed kernel argument");
+        error =
+            clSetKernelArg(kernel, argInd++, sizeof(streams[0]), &streams[0]);
+        test_error(error, "Unable to set indexed kernel arguments");
+        error =
+            clSetKernelArg(kernel, argInd++, sizeof(streams[1]), &streams[1]);
+        test_error(error, "Unable to set indexed kernel arguments");
+        if (LocalMemory())
+        {
+            error =
+                clSetKernelArg(kernel, argInd++, typeSize * numDestItems, NULL);
+            test_error(error, "Unable to set indexed local kernel argument");
+        }
+        if (LocalRefValues())
+        {
+            error =
+                clSetKernelArg(kernel, argInd++,
+                               LocalRefValues() ? typeSize * CurrentGroupSize()
+                                       * NumNonAtomicVariablesPerThread()
+                                                : 1,
+                               NULL);
+            test_error(error, "Unable to set indexed kernel argument");
+        }
+    }
+    /* Configure host threads */
+    std::vector<THostThreadContext> hostThreadContexts(hostThreadCount);
+    for (unsigned int t = 0; t < hostThreadCount; t++)
+    {
+        hostThreadContexts[t].test = this;
+        hostThreadContexts[t].tid = deviceThreadCount + t;
+        hostThreadContexts[t].threadCount = threadCount;
+        hostThreadContexts[t].destMemory =
+            UseSVM() ? svmAtomicBuffer : &destItems[0];
+        hostThreadContexts[t].oldValues =
+            UseSVM() ? svmDataBuffer : &refValues[0];
+    }
+
+    if (deviceThreadCount > 0)
+    {
+        /* Run the kernel */
+        threadNum[0] = deviceThreadCount;
+        groupSize = CurrentGroupSize();
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum,
+                                       &groupSize, 0, NULL, NULL);
+        test_error(error, "Unable to execute test kernel");
+        /* start device threads */
+        error = clFlush(queue);
+        test_error(error, "clFlush failed");
+    }
+
+    /* Start host threads and wait for finish */
+    if (hostThreadCount > 0)
+        ThreadPool_Do(HostThreadFunction, hostThreadCount,
+                      &hostThreadContexts[0]);
+
+    if (UseSVM())
+    {
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+        memcpy(&destItems[0], svmAtomicBuffer, typeSize * numDestItems);
+        memcpy(&refValues[0], svmDataBuffer,
+               typeSize * threadCount * NumNonAtomicVariablesPerThread());
+    }
     else
-      clSVMFree(context, svmDataBuffer);
-  }
-  _passCount++;
-  return 0;
+    {
+        if (deviceThreadCount > 0)
+        {
+            error = clEnqueueReadBuffer(queue, streams[0], CL_TRUE, 0,
+                                        typeSize * numDestItems, &destItems[0],
+                                        0, NULL, NULL);
+            test_error(error, "Unable to read result value!");
+            error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                        typeSize * deviceThreadCount
+                                            * NumNonAtomicVariablesPerThread(),
+                                        &refValues[0], 0, NULL, NULL);
+            test_error(error, "Unable to read reference values!");
+        }
+    }
+    bool dataVerified = false;
+    // If we have an expectedFn, then we need to generate a final value to
+    // compare against. If we don't have one, it's because we're comparing ref
+    // values only
+    for (cl_uint i = 0; i < numDestItems; i++)
+    {
+        HostDataType expected;
+
+        if (!ExpectedValue(expected, threadCount,
+                           startRefValues.size() ? &startRefValues[0] : 0, i))
+            break; // no expected value function provided
+
+        if (expected != destItems[i])
+        {
+            std::stringstream logLine;
+            logLine << "ERROR: Result " << i
+                    << " from kernel does not validate! (should be " << expected
+                    << ", was " << destItems[i] << ")\n";
+            log_error("%s", logLine.str().c_str());
+            for (i = 0; i < threadCount; i++)
+            {
+                logLine.str("");
+                logLine << " --- " << i << " - ";
+                if (startRefValues.size())
+                    logLine << startRefValues[i] << " -> " << refValues[i];
+                else
+                    logLine << refValues[i];
+                logLine << " --- ";
+                if (i < numDestItems) logLine << destItems[i];
+                logLine << "\n";
+                log_info("%s", logLine.str().c_str());
+            }
+            if (!gDebug)
+            {
+                log_info("Program source:\n");
+                log_info("%s\n", programLine);
+            }
+            return -1;
+        }
+        dataVerified = true;
+    }
+
+    bool dataCorrect = false;
+    /* Use the verify function (if provided) to also check the results */
+    if (VerifyRefs(dataCorrect, threadCount, &refValues[0], &destItems[0]))
+    {
+        if (!dataCorrect)
+        {
+            log_error("ERROR: Reference values did not validate!\n");
+            std::stringstream logLine;
+            for (cl_uint i = 0; i < threadCount; i++)
+                for (cl_uint j = 0; j < NumNonAtomicVariablesPerThread(); j++)
+                {
+                    logLine.str("");
+                    logLine
+                        << " --- " << i << " - "
+                        << refValues[i * NumNonAtomicVariablesPerThread() + j]
+                        << " --- ";
+                    if (j == 0 && i < numDestItems) logLine << destItems[i];
+                    logLine << "\n";
+                    log_info("%s", logLine.str().c_str());
+                }
+            if (!gDebug)
+            {
+                log_info("Program source:\n");
+                log_info("%s\n", programLine);
+            }
+            return -1;
+        }
+    }
+    else if (!dataVerified)
+    {
+        log_error("ERROR: Test doesn't check total or refs; no values are "
+                  "verified!\n");
+        return -1;
+    }
+
+    if (OldValueCheck()
+        && !(DeclaredInProgram()
+             && !LocalMemory())) // don't test for programs scope global atomics
+                                 // 'old' value has been overwritten by previous
+                                 // clEnqueueNDRangeKernel
+    {
+        /* Re-write the starting value */
+        for (size_t i = 0; i < numDestItems; i++) destItems[i] = _startValue;
+        refValues[0] = 0;
+        if (deviceThreadCount > 0)
+        {
+            error = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0,
+                                         typeSize * numDestItems, &destItems[0],
+                                         0, NULL, NULL);
+            test_error(error, "Unable to write starting values!");
+
+            /* Run the kernel once for a single thread, so we can verify that
+             * the returned value is the original one */
+            threadNum[0] = 1;
+            error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threadNum,
+                                           threadNum, 0, NULL, NULL);
+            test_error(error, "Unable to execute test kernel");
+
+            error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, typeSize,
+                                        &refValues[0], 0, NULL, NULL);
+            test_error(error, "Unable to read reference values!");
+        }
+        else
+        {
+            /* Start host thread */
+            HostFunction(0, 1, &destItems[0], &refValues[0]);
+        }
+
+        if (refValues[0] != _startValue) // destItems[0])
+        {
+            std::stringstream logLine;
+            logLine << "ERROR: atomic function operated correctly but did NOT "
+                       "return correct 'old' value "
+                       " (should have been "
+                    << destItems[0] << ", returned " << refValues[0] << ")!\n";
+            log_error("%s", logLine.str().c_str());
+            if (!gDebug)
+            {
+                log_info("Program source:\n");
+                log_info("%s\n", programLine);
+            }
+            return -1;
+        }
+    }
+    if (UseSVM())
+    {
+        // the buffer object must first be released before the SVM buffer is
+        // freed. The Wrapper Class method reset() will do that
+        streams[0].reset();
+        if (gUseHostPtr)
+            free(svmAtomicBuffer);
+        else
+            clSVMFree(context, svmAtomicBuffer);
+        streams[1].reset();
+        if (gUseHostPtr)
+            free(svmDataBuffer);
+        else
+            clSVMFree(context, svmDataBuffer);
+    }
+    _passCount++;
+    return 0;
 }
 
 #endif //_COMMON_H_
-- 
cgit v1.2.3


From 43e1397468053608134816cbcf6e8496e91cb227 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Wed, 18 Aug 2021 11:11:30 +0100
Subject: Fix kernel source for cl_khr_suggested_local_work_size (#1300)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use ASCII '-' instead of unicode '–' as subtration operator.

Signed-off-by: Kévin Petit <kpet@free.fr>
---
 test_conformance/workgroups/test_wg_suggested_local_work_size.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
index 1dc1b39c..aa02391c 100644
--- a/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
+++ b/test_conformance/workgroups/test_wg_suggested_local_work_size.cpp
@@ -42,9 +42,9 @@ const char* wg_scan_local_work_group_size = R"(
     {
         size_t linear_id;
 #if __OPENCL_VERSION__ < CL_VERSION_2_0
-        linear_id = ((get_global_id(2) – get_global_offset(2)) * get_global_size(1) * get_global_size(0)) + 
-                    ((get_global_id(1) – get_global_offset(1)) * get_global_size(0)) + 
-                    (get_global_id(0) – get_global_offset(0));
+        linear_id = ((get_global_id(2) - get_global_offset(2)) * get_global_size(1) * get_global_size(0)) +
+                    ((get_global_id(1) - get_global_offset(1)) * get_global_size(0)) +
+                    (get_global_id(0) - get_global_offset(0));
 #else
         linear_id = get_global_linear_id();
 #endif
@@ -608,4 +608,4 @@ int test_work_group_suggested_local_size_3D(cl_device_id device,
              "global_work_offset passed\n");
 
     return err;
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From 6c3c7e5266cddce9cfa466c02c14b43fee453110 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Thu, 19 Aug 2021 12:15:47 +0100
Subject: Remove unused definitions in CMakeLists.txt (#1302)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Kévin Petit <kpet@free.fr>
---
 CMakeLists.txt | 47 -----------------------------------------------
 1 file changed, 47 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8d947ed1..a614649f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -10,12 +10,6 @@ set(CMAKE_C_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_STANDARD 11)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
-if(CMAKE_BUILD_TYPE STREQUAL "release")
-    set (BUILD_FLAVOR "release")
-else(CMAKE_BUILD_TYPE STREQUAL "release")
-    set (BUILD_FLAVOR "debug")
-endif(CMAKE_BUILD_TYPE STREQUAL "release")
-
 add_definitions(-DCL_TARGET_OPENCL_VERSION=300)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_2_APIS=1)
 add_definitions(-DCL_USE_DEPRECATED_OPENCL_2_1_APIS=1)
@@ -29,14 +23,6 @@ if(USE_CL_EXPERIMENTAL)
   add_definitions(-DCL_EXPERIMENTAL)
 endif(USE_CL_EXPERIMENTAL)
 
-# Support both VS2008 and VS2012.
-set(BUILD_DIR "$ENV{ADRENO_DRIVER}/build")
-if(MSVC90)
-  set(VS_BUILD_DIR "${BUILD_DIR}/vs2008")
-else(MSVC110)
-  set(VS_BUILD_DIR "${BUILD_DIR}/vs2012")
-endif(MSVC90)
-
 #-----------------------------------------------------------
 # Default Configurable Test Set
 #-----------------------------------------------------------
@@ -164,38 +150,5 @@ include_directories(${CLConform_SOURCE_DIR}/test_common/harness
                     ${CLConform_SOURCE_DIR}/test_common/gl
                     ${CLConform_SOURCE_DIR}/test_common)
 
-if(CMAKE_BUILD_TYPE STREQUAL "release")
-    set (BUILD_FLAVOR "release")
-elseif (CMAKE_BUILD_TYPE STREQUAL "debug")
-    set (BUILD_FLAVOR "debug")
-endif(CMAKE_BUILD_TYPE STREQUAL "release")
-
-
 add_subdirectory(test_common)
 add_subdirectory(test_conformance)
-
-# Support both VS2008 and VS2012.
-set (DLL_FILES "${VS_BUILD_DIR}/Debug/*.dll")
-set (DST_DIR   "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/Debug/")
-
-if (WIN32)
-    set (COPY "echo")
-    add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX} ALL
-                      COMMAND ${COPY} "${DLL_FILES}" "${DST_DIR}"
-                      COMMENT "Copying dll files.. ")
-else (WIN32)
-    set (COPY cp)
-    add_custom_target(COPY_DLL${CONFORMANCE_SUFFIX})
-endif(WIN32)
-
-set_property(TARGET COPY_DLL${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
-
-if(WIN32)
-  add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} ALL
-                     COMMAND ${COPY} ${DLL_FILES} ${DST_DIR}
-                     COMMENT "Copying other files to output folder..." )
-else(WIN32)
-  add_custom_target( COPY_FILES${CONFORMANCE_SUFFIX} )
-endif(WIN32)
-
-set_property(TARGET COPY_FILES${CONFORMANCE_SUFFIX} PROPERTY FOLDER "CONFORMANCE${CONFORMANCE_SUFFIX}")
-- 
cgit v1.2.3


From 070f8c0c0ed8786e410584efa3fefa47bdab02c6 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 25 Aug 2021 02:14:58 -0700
Subject: add tests for cl_khr_integer_dot_product (#1276)

* cl_khr_integer_dot_product_tests

* remove emulated codepaths

* fix formatting

* address code review comments

* remove emulated codepaths again

* address one more review comment
---
 test_common/harness/integer_ops_test_info.h        |  91 +++++
 test_conformance/integer_ops/CMakeLists.txt        |   1 +
 test_conformance/integer_ops/main.cpp              | 236 ++++++-------
 test_conformance/integer_ops/procs.h               |   2 +
 .../integer_ops/test_integer_dot_product.cpp       | 380 +++++++++++++++++++++
 5 files changed, 593 insertions(+), 117 deletions(-)
 create mode 100644 test_common/harness/integer_ops_test_info.h
 create mode 100644 test_conformance/integer_ops/test_integer_dot_product.cpp

diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h
new file mode 100644
index 00000000..c25843dd
--- /dev/null
+++ b/test_common/harness/integer_ops_test_info.h
@@ -0,0 +1,91 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef INTEGER_OPS_TEST_INFO_H
+#define INTEGER_OPS_TEST_INFO_H
+
+#include "conversions.h"
+
+// TODO: expand usage to other tests.
+
+template <typename T> struct TestInfo
+{
+};
+template <> struct TestInfo<cl_char>
+{
+    static const ExplicitType explicitType = kChar;
+    static constexpr const char* deviceTypeName = "char";
+    static constexpr const char* deviceTypeNameSigned = "char";
+    static constexpr const char* deviceTypeNameUnsigned = "uchar";
+};
+template <> struct TestInfo<cl_uchar>
+{
+    static const ExplicitType explicitType = kUChar;
+    static constexpr const char* deviceTypeName = "uchar";
+    static constexpr const char* deviceTypeNameSigned = "char";
+    static constexpr const char* deviceTypeNameUnsigned = "uchar";
+};
+template <> struct TestInfo<cl_short>
+{
+    static const ExplicitType explicitType = kShort;
+    static constexpr const char* deviceTypeName = "short";
+    static constexpr const char* deviceTypeNameSigned = "short";
+    static constexpr const char* deviceTypeNameUnsigned = "ushort";
+};
+template <> struct TestInfo<cl_ushort>
+{
+    static const ExplicitType explicitType = kUShort;
+    static constexpr const char* deviceTypeName = "ushort";
+    static constexpr const char* deviceTypeNameSigned = "short";
+    static constexpr const char* deviceTypeNameUnsigned = "ushort";
+};
+template <> struct TestInfo<cl_int>
+{
+    static const ExplicitType explicitType = kInt;
+    static constexpr const char* deviceTypeName = "int";
+    static constexpr const char* deviceTypeNameSigned = "int";
+    static constexpr const char* deviceTypeNameUnsigned = "uint";
+};
+template <> struct TestInfo<cl_uint>
+{
+    static const ExplicitType explicitType = kUInt;
+    static constexpr const char* deviceTypeName = "uint";
+    static constexpr const char* deviceTypeNameSigned = "int";
+    static constexpr const char* deviceTypeNameUnsigned = "uint";
+};
+template <> struct TestInfo<cl_long>
+{
+    static const ExplicitType explicitType = kLong;
+    static constexpr const char* deviceTypeName = "long";
+    static constexpr const char* deviceTypeNameSigned = "long";
+    static constexpr const char* deviceTypeNameUnsigned = "ulong";
+};
+template <> struct TestInfo<cl_ulong>
+{
+    static const ExplicitType explicitType = kULong;
+    static constexpr const char* deviceTypeName = "ulong";
+    static constexpr const char* deviceTypeNameSigned = "long";
+    static constexpr const char* deviceTypeNameUnsigned = "ulong";
+};
+
+template <typename T>
+static void fill_vector_with_random_data(std::vector<T>& v)
+{
+    MTdataHolder d(gRandomSeed);
+    generate_random_data(TestInfo<T>::explicitType, v.size(), d, v.data());
+}
+
+#endif /* INTEGER_OPS_TEST_INFO_H */
diff --git a/test_conformance/integer_ops/CMakeLists.txt b/test_conformance/integer_ops/CMakeLists.txt
index a045ef81..5344eabc 100644
--- a/test_conformance/integer_ops/CMakeLists.txt
+++ b/test_conformance/integer_ops/CMakeLists.txt
@@ -11,6 +11,7 @@ set(${MODULE_NAME}_SOURCES
     test_unary_ops.cpp
     verification_and_generation_functions.cpp
     test_popcount.cpp
+    test_integer_dot_product.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/integer_ops/main.cpp b/test_conformance/integer_ops/main.cpp
index 00e91661..e57cffd9 100644
--- a/test_conformance/integer_ops/main.cpp
+++ b/test_conformance/integer_ops/main.cpp
@@ -25,127 +25,129 @@
 #endif
 
 test_definition test_list[] = {
-    ADD_TEST( integer_clz ),
-    ADD_TEST_VERSION( integer_ctz,  Version(2, 0)),
-    ADD_TEST( integer_hadd ),
-    ADD_TEST( integer_rhadd ),
-    ADD_TEST( integer_mul_hi ),
-    ADD_TEST( integer_rotate ),
-    ADD_TEST( integer_clamp ),
-    ADD_TEST( integer_mad_sat ),
-    ADD_TEST( integer_mad_hi ),
-    ADD_TEST( integer_min ),
-    ADD_TEST( integer_max ),
-    ADD_TEST( integer_upsample ),
-
-    ADD_TEST( integer_abs ),
-    ADD_TEST( integer_abs_diff ),
-    ADD_TEST( integer_add_sat ),
-    ADD_TEST( integer_sub_sat ),
-
-    ADD_TEST( integer_addAssign ),
-    ADD_TEST( integer_subtractAssign ),
-    ADD_TEST( integer_multiplyAssign ),
-    ADD_TEST( integer_divideAssign ),
-    ADD_TEST( integer_moduloAssign ),
-    ADD_TEST( integer_andAssign ),
-    ADD_TEST( integer_orAssign ),
-    ADD_TEST( integer_exclusiveOrAssign ),
-
-    ADD_TEST( unary_ops_increment ),
-    ADD_TEST( unary_ops_decrement ),
-    ADD_TEST( unary_ops_full ),
-
-    ADD_TEST( integer_mul24 ),
-    ADD_TEST( integer_mad24 ),
-
-    ADD_TEST( long_math ),
-    ADD_TEST( long_logic ),
-    ADD_TEST( long_shift ),
-    ADD_TEST( long_compare ),
-
-    ADD_TEST( ulong_math ),
-    ADD_TEST( ulong_logic ),
-    ADD_TEST( ulong_shift ),
-    ADD_TEST( ulong_compare ),
-
-    ADD_TEST( int_math ),
-    ADD_TEST( int_logic ),
-    ADD_TEST( int_shift ),
-    ADD_TEST( int_compare ),
-
-    ADD_TEST( uint_math ),
-    ADD_TEST( uint_logic ),
-    ADD_TEST( uint_shift ),
-    ADD_TEST( uint_compare ),
-
-    ADD_TEST( short_math ),
-    ADD_TEST( short_logic ),
-    ADD_TEST( short_shift ),
-    ADD_TEST( short_compare ),
-
-    ADD_TEST( ushort_math ),
-    ADD_TEST( ushort_logic ),
-    ADD_TEST( ushort_shift ),
-    ADD_TEST( ushort_compare ),
-
-    ADD_TEST( char_math ),
-    ADD_TEST( char_logic ),
-    ADD_TEST( char_shift ),
-    ADD_TEST( char_compare ),
-
-    ADD_TEST( uchar_math ),
-    ADD_TEST( uchar_logic ),
-    ADD_TEST( uchar_shift ),
-    ADD_TEST( uchar_compare ),
-
-    ADD_TEST( popcount ),
+    ADD_TEST(integer_clz),
+    ADD_TEST_VERSION(integer_ctz, Version(2, 0)),
+    ADD_TEST(integer_hadd),
+    ADD_TEST(integer_rhadd),
+    ADD_TEST(integer_mul_hi),
+    ADD_TEST(integer_rotate),
+    ADD_TEST(integer_clamp),
+    ADD_TEST(integer_mad_sat),
+    ADD_TEST(integer_mad_hi),
+    ADD_TEST(integer_min),
+    ADD_TEST(integer_max),
+    ADD_TEST(integer_upsample),
+
+    ADD_TEST(integer_abs),
+    ADD_TEST(integer_abs_diff),
+    ADD_TEST(integer_add_sat),
+    ADD_TEST(integer_sub_sat),
+
+    ADD_TEST(integer_addAssign),
+    ADD_TEST(integer_subtractAssign),
+    ADD_TEST(integer_multiplyAssign),
+    ADD_TEST(integer_divideAssign),
+    ADD_TEST(integer_moduloAssign),
+    ADD_TEST(integer_andAssign),
+    ADD_TEST(integer_orAssign),
+    ADD_TEST(integer_exclusiveOrAssign),
+
+    ADD_TEST(unary_ops_increment),
+    ADD_TEST(unary_ops_decrement),
+    ADD_TEST(unary_ops_full),
+
+    ADD_TEST(integer_mul24),
+    ADD_TEST(integer_mad24),
+
+    ADD_TEST(long_math),
+    ADD_TEST(long_logic),
+    ADD_TEST(long_shift),
+    ADD_TEST(long_compare),
+
+    ADD_TEST(ulong_math),
+    ADD_TEST(ulong_logic),
+    ADD_TEST(ulong_shift),
+    ADD_TEST(ulong_compare),
+
+    ADD_TEST(int_math),
+    ADD_TEST(int_logic),
+    ADD_TEST(int_shift),
+    ADD_TEST(int_compare),
+
+    ADD_TEST(uint_math),
+    ADD_TEST(uint_logic),
+    ADD_TEST(uint_shift),
+    ADD_TEST(uint_compare),
+
+    ADD_TEST(short_math),
+    ADD_TEST(short_logic),
+    ADD_TEST(short_shift),
+    ADD_TEST(short_compare),
+
+    ADD_TEST(ushort_math),
+    ADD_TEST(ushort_logic),
+    ADD_TEST(ushort_shift),
+    ADD_TEST(ushort_compare),
+
+    ADD_TEST(char_math),
+    ADD_TEST(char_logic),
+    ADD_TEST(char_shift),
+    ADD_TEST(char_compare),
+
+    ADD_TEST(uchar_math),
+    ADD_TEST(uchar_logic),
+    ADD_TEST(uchar_shift),
+    ADD_TEST(uchar_compare),
+
+    ADD_TEST(popcount),
 
     // Quick
-    ADD_TEST( quick_long_math ),
-    ADD_TEST( quick_long_logic ),
-    ADD_TEST( quick_long_shift ),
-    ADD_TEST( quick_long_compare ),
-
-    ADD_TEST( quick_ulong_math ),
-    ADD_TEST( quick_ulong_logic ),
-    ADD_TEST( quick_ulong_shift ),
-    ADD_TEST( quick_ulong_compare ),
-
-    ADD_TEST( quick_int_math ),
-    ADD_TEST( quick_int_logic ),
-    ADD_TEST( quick_int_shift ),
-    ADD_TEST( quick_int_compare ),
-
-    ADD_TEST( quick_uint_math ),
-    ADD_TEST( quick_uint_logic ),
-    ADD_TEST( quick_uint_shift ),
-    ADD_TEST( quick_uint_compare ),
-
-    ADD_TEST( quick_short_math ),
-    ADD_TEST( quick_short_logic ),
-    ADD_TEST( quick_short_shift ),
-    ADD_TEST( quick_short_compare ),
-
-    ADD_TEST( quick_ushort_math ),
-    ADD_TEST( quick_ushort_logic ),
-    ADD_TEST( quick_ushort_shift ),
-    ADD_TEST( quick_ushort_compare ),
-
-    ADD_TEST( quick_char_math ),
-    ADD_TEST( quick_char_logic ),
-    ADD_TEST( quick_char_shift ),
-    ADD_TEST( quick_char_compare ),
-
-    ADD_TEST( quick_uchar_math ),
-    ADD_TEST( quick_uchar_logic ),
-    ADD_TEST( quick_uchar_shift ),
-    ADD_TEST( quick_uchar_compare ),
-
-    ADD_TEST( vector_scalar ),
+    ADD_TEST(quick_long_math),
+    ADD_TEST(quick_long_logic),
+    ADD_TEST(quick_long_shift),
+    ADD_TEST(quick_long_compare),
+
+    ADD_TEST(quick_ulong_math),
+    ADD_TEST(quick_ulong_logic),
+    ADD_TEST(quick_ulong_shift),
+    ADD_TEST(quick_ulong_compare),
+
+    ADD_TEST(quick_int_math),
+    ADD_TEST(quick_int_logic),
+    ADD_TEST(quick_int_shift),
+    ADD_TEST(quick_int_compare),
+
+    ADD_TEST(quick_uint_math),
+    ADD_TEST(quick_uint_logic),
+    ADD_TEST(quick_uint_shift),
+    ADD_TEST(quick_uint_compare),
+
+    ADD_TEST(quick_short_math),
+    ADD_TEST(quick_short_logic),
+    ADD_TEST(quick_short_shift),
+    ADD_TEST(quick_short_compare),
+
+    ADD_TEST(quick_ushort_math),
+    ADD_TEST(quick_ushort_logic),
+    ADD_TEST(quick_ushort_shift),
+    ADD_TEST(quick_ushort_compare),
+
+    ADD_TEST(quick_char_math),
+    ADD_TEST(quick_char_logic),
+    ADD_TEST(quick_char_shift),
+    ADD_TEST(quick_char_compare),
+
+    ADD_TEST(quick_uchar_math),
+    ADD_TEST(quick_uchar_logic),
+    ADD_TEST(quick_uchar_shift),
+    ADD_TEST(quick_uchar_compare),
+
+    ADD_TEST(vector_scalar),
+
+    ADD_TEST(integer_dot_product),
 };
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 void fill_test_values( cl_long *outBufferA, cl_long *outBufferB, size_t numElements, MTdata d )
 {
diff --git a/test_conformance/integer_ops/procs.h b/test_conformance/integer_ops/procs.h
index d5b77e70..82311fb9 100644
--- a/test_conformance/integer_ops/procs.h
+++ b/test_conformance/integer_ops/procs.h
@@ -141,3 +141,5 @@ extern int test_unary_ops_decrement(cl_device_id deviceID, cl_context context, c
 
 extern int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
+extern int test_integer_dot_product(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue, int num_elements);
diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp
new file mode 100644
index 00000000..b5378ae0
--- /dev/null
+++ b/test_conformance/integer_ops/test_integer_dot_product.cpp
@@ -0,0 +1,380 @@
+//
+// Copyright (c) 2021 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// This is needed for std::numeric_limits<>::min() and max() to work on Windows.
+#if defined(_WIN32)
+#define NOMINMAX
+#endif
+
+#include <algorithm>
+#include <limits>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#include "procs.h"
+#include "harness/integer_ops_test_info.h"
+#include "harness/testHarness.h"
+
+template <size_t N, typename DstType, typename SrcTypeA, typename SrcTypeB>
+static void
+calculate_reference(std::vector<DstType>& ref, const std::vector<SrcTypeA>& a,
+                    const std::vector<SrcTypeB>& b, const bool AccSat = false,
+                    const std::vector<DstType>& acc = {})
+{
+    assert(a.size() == b.size());
+    assert(AccSat == false || acc.size() == a.size() / N);
+
+    ref.resize(a.size() / N);
+    for (size_t r = 0; r < ref.size(); r++)
+    {
+        cl_long result = AccSat ? acc[r] : 0;
+        for (size_t c = 0; c < N; c++)
+        {
+            // OK to assume no overflow?
+            result += a[r * N + c] * b[r * N + c];
+        }
+        if (AccSat && result > std::numeric_limits<DstType>::max())
+        {
+            result = std::numeric_limits<DstType>::max();
+        }
+        ref[r] = static_cast<DstType>(result);
+    }
+}
+
+template <typename SrcTypeA, typename SrcTypeB>
+void generate_inputs_with_special_values(std::vector<SrcTypeA>& a,
+                                         std::vector<SrcTypeB>& b)
+{
+    const std::vector<SrcTypeA> specialValuesA(
+        { static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min()),
+          static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() + 1),
+          static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::min() / 2), 0,
+          static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() / 2),
+          static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max() - 1),
+          static_cast<SrcTypeA>(std::numeric_limits<SrcTypeA>::max()) });
+    const std::vector<SrcTypeB> specialValuesB(
+        { static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min()),
+          static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() + 1),
+          static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::min() / 2), 0,
+          static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() / 2),
+          static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max() - 1),
+          static_cast<SrcTypeB>(std::numeric_limits<SrcTypeB>::max()) });
+
+    size_t count = 0;
+    for (auto svA : specialValuesA)
+    {
+        for (auto svB : specialValuesB)
+        {
+            a[count] = svA;
+            b[count] = svB;
+            ++count;
+        }
+    }
+
+    // Generate random data for the rest of the inputs:
+    MTdataHolder d(gRandomSeed);
+    generate_random_data(TestInfo<SrcTypeA>::explicitType, a.size() - count, d,
+                         a.data() + count);
+    generate_random_data(TestInfo<SrcTypeB>::explicitType, b.size() - count, d,
+                         b.data() + count);
+}
+
+template <typename SrcType>
+void generate_acc_sat_inputs(std::vector<SrcType>& acc)
+{
+    // First generate random data:
+    fill_vector_with_random_data(acc);
+
+    // Now go through the generated data, and make every other element large.
+    // This ensures we have some elements that need saturation.
+    for (size_t i = 0; i < acc.size(); i += 2)
+    {
+        acc[i] = std::numeric_limits<SrcType>::max() - acc[i];
+    }
+}
+
+template <typename T> struct PackedTestInfo
+{
+    static constexpr const char* deviceTypeName = "UNSUPPORTED";
+};
+template <> struct PackedTestInfo<cl_char>
+{
+    static constexpr const char* deviceTypeName = "int";
+};
+template <> struct PackedTestInfo<cl_uchar>
+{
+    static constexpr const char* deviceTypeName = "uint";
+};
+
+static constexpr const char* kernel_source_dot = R"CLC(
+__kernel void test_dot(__global DSTTYPE* dst, __global SRCTYPEA* a, __global SRCTYPEB* b)
+{
+    int index = get_global_id(0);
+    dst[index] = DOT(a[index], b[index]);
+}
+)CLC";
+
+static constexpr const char* kernel_source_dot_acc_sat = R"CLC(
+__kernel void test_dot_acc_sat(
+    __global DSTTYPE* dst,
+    __global SRCTYPEA* a, __global SRCTYPEB* b, __global DSTTYPE* acc)
+{
+    int index = get_global_id(0);
+    dst[index] = DOT_ACC_SAT(a[index], b[index], acc[index]);
+}
+)CLC";
+
+template <typename DstType, typename SrcTypeA, typename SrcTypeB, size_t N>
+static int test_case_dot(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements, bool packed,
+                         bool sat)
+{
+    log_info("    testing %s = dot%s%s(%s, %s)\n",
+             std::numeric_limits<DstType>::is_signed ? "signed" : "unsigned",
+             sat ? "_acc_sat" : "", packed ? "_packed" : "",
+             std::numeric_limits<SrcTypeA>::is_signed ? "signed" : "unsigned",
+             std::numeric_limits<SrcTypeB>::is_signed ? "signed" : "unsigned");
+
+    cl_int error = CL_SUCCESS;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    std::string buildOptions;
+    buildOptions += " -DDSTTYPE=";
+    buildOptions += TestInfo<DstType>::deviceTypeName;
+    buildOptions += " -DSRCTYPEA=";
+    buildOptions += packed
+        ? PackedTestInfo<SrcTypeA>::deviceTypeName
+        : TestInfo<SrcTypeA>::deviceTypeName + std::to_string(N);
+    buildOptions += " -DSRCTYPEB=";
+    buildOptions += packed
+        ? PackedTestInfo<SrcTypeB>::deviceTypeName
+        : TestInfo<SrcTypeB>::deviceTypeName + std::to_string(N);
+    std::string packedSuffix;
+    packedSuffix += std::numeric_limits<SrcTypeA>::is_signed ? "s" : "u";
+    packedSuffix += std::numeric_limits<SrcTypeB>::is_signed ? "s" : "u";
+    packedSuffix += std::numeric_limits<DstType>::is_signed ? "_int" : "_uint";
+    if (sat)
+    {
+        buildOptions += packed
+            ? " -DDOT_ACC_SAT=dot_acc_sat_4x8packed_" + packedSuffix
+            : " -DDOT_ACC_SAT=dot_acc_sat";
+    }
+    else
+    {
+        buildOptions +=
+            packed ? " -DDOT=dot_4x8packed_" + packedSuffix : " -DDOT=dot";
+    }
+
+    std::vector<SrcTypeA> a(N * num_elements);
+    std::vector<SrcTypeB> b(N * num_elements);
+    generate_inputs_with_special_values(a, b);
+
+    std::vector<DstType> acc;
+    if (sat)
+    {
+        acc.resize(num_elements);
+        generate_acc_sat_inputs(acc);
+    }
+
+    std::vector<DstType> reference(num_elements);
+    calculate_reference<N>(reference, a, b, sat, acc);
+
+    const char* source = sat ? kernel_source_dot_acc_sat : kernel_source_dot;
+    const char* name = sat ? "test_dot_acc_sat" : "test_dot";
+    error = create_single_kernel_helper(context, &program, &kernel, 1, &source,
+                                        name, buildOptions.c_str());
+    test_error(error, "Unable to create test kernel");
+
+    clMemWrapper dst = clCreateBuffer(
+        context, 0, reference.size() * sizeof(DstType), NULL, &error);
+    test_error(error, "Unable to create output buffer");
+
+    clMemWrapper srcA =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       a.size() * sizeof(SrcTypeA), a.data(), &error);
+    test_error(error, "Unable to create srcA buffer");
+
+    clMemWrapper srcB =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       b.size() * sizeof(SrcTypeB), b.data(), &error);
+    test_error(error, "Unable to create srcB buffer");
+
+    clMemWrapper srcAcc;
+    if (sat)
+    {
+        srcAcc =
+            clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                           acc.size() * sizeof(DstType), acc.data(), &error);
+        test_error(error, "Unable to create acc buffer");
+    }
+
+    error = clSetKernelArg(kernel, 0, sizeof(dst), &dst);
+    test_error(error, "Unable to set output buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 1, sizeof(srcA), &srcA);
+    test_error(error, "Unable to set srcA buffer kernel arg");
+
+    error = clSetKernelArg(kernel, 2, sizeof(srcB), &srcB);
+    test_error(error, "Unable to set srcB buffer kernel arg");
+
+    if (sat)
+    {
+        error = clSetKernelArg(kernel, 3, sizeof(srcAcc), &srcAcc);
+        test_error(error, "Unable to set acc buffer kernel arg");
+    }
+
+    size_t global_work_size[] = { reference.size() };
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+                                   NULL, 0, NULL, NULL);
+    test_error(error, "Unable to enqueue test kernel");
+
+    error = clFinish(queue);
+    test_error(error, "clFinish failed after test kernel");
+
+    std::vector<DstType> results(reference.size(), 99);
+    error = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0,
+                                results.size() * sizeof(DstType),
+                                results.data(), 0, NULL, NULL);
+    test_error(error, "Unable to read data after test kernel");
+
+    if (results != reference)
+    {
+        log_error("Result buffer did not match reference buffer!\n");
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
+template <typename SrcType, typename DstType, size_t N>
+static int test_vectype(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    int result = TEST_PASS;
+
+    typedef typename std::make_signed<SrcType>::type SSrcType;
+    typedef typename std::make_signed<DstType>::type SDstType;
+
+    typedef typename std::make_unsigned<SrcType>::type USrcType;
+    typedef typename std::make_unsigned<DstType>::type UDstType;
+
+    // dot testing:
+    result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, false, false);
+    result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, false, false);
+    result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, false, false);
+    result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, false, false);
+
+    // dot_acc_sat testing:
+    result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, false, true);
+    result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, false, true);
+    result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, false, true);
+    result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, false, true);
+
+    return result;
+}
+
+template <typename SrcType, typename DstType, size_t N>
+static int test_vectype_packed(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements)
+{
+    int result = TEST_PASS;
+
+    typedef typename std::make_signed<SrcType>::type SSrcType;
+    typedef typename std::make_signed<DstType>::type SDstType;
+
+    typedef typename std::make_unsigned<SrcType>::type USrcType;
+    typedef typename std::make_unsigned<DstType>::type UDstType;
+
+    // packed dot testing:
+    result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, true, false);
+    result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, true, false);
+    result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, true, false);
+    result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, true, false);
+
+    // packed dot_acc_sat testing:
+    result |= test_case_dot<UDstType, USrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, true, true);
+    result |= test_case_dot<SDstType, SSrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, true, true);
+    result |= test_case_dot<SDstType, USrcType, SSrcType, N>(
+        deviceID, context, queue, num_elements, true, true);
+    result |= test_case_dot<SDstType, SSrcType, USrcType, N>(
+        deviceID, context, queue, num_elements, true, true);
+
+    return result;
+}
+
+int test_integer_dot_product(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements)
+{
+    if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+    {
+        log_info("cl_khr_integer_dot_product is not supported\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_int error = CL_SUCCESS;
+    int result = TEST_PASS;
+
+    cl_device_integer_dot_product_capabilities_khr dotCaps = 0;
+    error = clGetDeviceInfo(deviceID,
+                            CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+                            sizeof(dotCaps), &dotCaps, NULL);
+    test_error(
+        error,
+        "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR");
+    test_assert_error(
+        dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR,
+        "When cl_khr_integer_dot_product is supported "
+        "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be "
+        "supported");
+
+    if (dotCaps
+        & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR
+            | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
+    {
+        log_info("NOTE: found an unknown / untested capability!\n");
+    }
+
+    if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)
+    {
+        result |= test_vectype<cl_uchar, cl_uint, 4>(deviceID, context, queue,
+                                                     num_elements);
+    }
+
+    if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR)
+    {
+        result |= test_vectype_packed<cl_uchar, cl_uint, 4>(
+            deviceID, context, queue, num_elements);
+    }
+
+    return result;
+}
-- 
cgit v1.2.3


From 39fdb462be7ea4bf2c2b2c6d23e84a70c3def78d Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Sat, 28 Aug 2021 02:21:34 -0700
Subject: define NOMINMAX in the CMakefile to fix std::min and std::max on MSVC
 (#1308)

---
 CMakeLists.txt                                            | 2 ++
 test_common/harness/kernelHelpers.cpp                     | 2 +-
 test_common/harness/os_helpers.cpp                        | 3 ---
 test_conformance/basic/test_async_copy2D.cpp              | 4 ++--
 test_conformance/basic/test_async_copy3D.cpp              | 4 ++--
 test_conformance/integer_ops/test_integer_dot_product.cpp | 5 -----
 6 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a614649f..04551dfb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -115,6 +115,8 @@ endif()
 if(MSVC)
     # Don't warn when using standard non-secure functions.
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+    # Fix std::min and std::max handling with windows.harness.
+    add_compile_definitions(NOMINMAX)
 endif()
 
 if( WIN32 AND "${CMAKE_CXX_COMPILER_ID}" MATCHES "Intel" )
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 18f51cbe..1d1f8d8c 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -1707,7 +1707,7 @@ Version get_max_OpenCL_C_for_context(cl_context context)
                       else
                       {
                           current_version =
-                              (std::min)(device_version, current_version);
+                              std::min(device_version, current_version);
                       }
                   });
     return current_version;
diff --git a/test_common/harness/os_helpers.cpp b/test_common/harness/os_helpers.cpp
index daf21958..8fc91108 100644
--- a/test_common/harness/os_helpers.cpp
+++ b/test_common/harness/os_helpers.cpp
@@ -333,9 +333,6 @@ std::string exe_dir()
 
 
 #include <windows.h>
-#if defined(max)
-#undef max
-#endif
 
 #include <cctype>
 #include <algorithm>
diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index 9fbdcb6e..fafcac83 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -203,13 +203,13 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
         / (numElementsPerLine + srcStride);
     size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
         / (numElementsPerLine + dstStride);
-    size_t maxTotalLines = (std::min)(maxTotalLinesIn, maxTotalLinesOut);
+    size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
     size_t maxLocalWorkgroups =
         maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
 
     size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
         - (localIsDst ? dstStride : srcStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
+    size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
     size_t totalLines =
         numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
     size_t inBufferSize = elementSize
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
index 252159bc..2b184ee5 100644
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ b/test_conformance/basic/test_async_copy3D.cpp
@@ -230,13 +230,13 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
         / ((numLines * numElementsPerLine + numLines * dstLineStride)
            + dstPlaneStride);
-    size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut);
+    size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
     size_t maxLocalWorkgroups =
         maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
 
     size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
         - (localIsDst ? dstPlaneStride : srcPlaneStride);
-    size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
+    size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
     size_t totalPlanes =
         numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
     size_t inBufferSize = elementSize
diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp
index b5378ae0..be25b320 100644
--- a/test_conformance/integer_ops/test_integer_dot_product.cpp
+++ b/test_conformance/integer_ops/test_integer_dot_product.cpp
@@ -14,11 +14,6 @@
 // limitations under the License.
 //
 
-// This is needed for std::numeric_limits<>::min() and max() to work on Windows.
-#if defined(_WIN32)
-#define NOMINMAX
-#endif
-
 #include <algorithm>
 #include <limits>
 #include <numeric>
-- 
cgit v1.2.3


From 7cfd3a6033f547905da40c06fae32b9337df0b03 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Sun, 29 Aug 2021 23:12:52 +0100
Subject: Report failures in  simple_{read,write}_image_pitch tests (#1309)

---
 test_conformance/basic/test_simple_image_pitch.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test_conformance/basic/test_simple_image_pitch.cpp b/test_conformance/basic/test_simple_image_pitch.cpp
index 1cd82b6f..2eb43b3a 100644
--- a/test_conformance/basic/test_simple_image_pitch.cpp
+++ b/test_conformance/basic/test_simple_image_pitch.cpp
@@ -83,7 +83,7 @@ int test_simple_read_image_pitch(cl_device_id device, cl_context cl_context_, cl
   free(host_image);
   free(host_buffer);
 
-  return CL_SUCCESS;
+  return errors == 0 ? TEST_PASS : TEST_FAIL;
 }
 
 int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, cl_command_queue q, int num_elements)
@@ -149,5 +149,5 @@ int test_simple_write_image_pitch(cl_device_id device, cl_context cl_context_, c
   free(host_image);
   free(host_buffer);
 
-  return CL_SUCCESS;
+  return errors == 0 ? TEST_PASS : TEST_FAIL;
 }
-- 
cgit v1.2.3


From e27a97fbd81b6b426a29857a3e1c04d37255931c Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Tue, 31 Aug 2021 16:53:55 +0200
Subject: Add cl_khr_integer_dot_product to known extensions in test compiler.
 (#1316)

---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index a1d8d8bd..de30e06b 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -50,6 +50,7 @@ const char *known_extensions[] = {
     "cl_khr_subgroup_shuffle_relative",
     "cl_khr_subgroup_clustered_reduce",
     "cl_khr_extended_bit_ops",
+    "cl_khr_integer_dot_product",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",
@@ -77,7 +78,7 @@ const char *known_extensions[] = {
 };
 
 size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *);
-size_t first_API_extension = 28;
+size_t first_API_extension = 29;
 
 const char *known_embedded_extensions[] = {
     "cles_khr_int64",
-- 
cgit v1.2.3


From 995c7dbfbbb7b38c4ad6ce59d66b01b53ef031b2 Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 31 Aug 2021 11:44:17 -0700
Subject: suppress MSVC strdup warning (#1314)

---
 CMakeLists.txt             | 2 ++
 test_common/CMakeLists.txt | 5 -----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 04551dfb..7b307a11 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -115,6 +115,8 @@ endif()
 if(MSVC)
     # Don't warn when using standard non-secure functions.
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
+    # Don't warn about using the portable "strdup" function.
+    add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE)
     # Fix std::min and std::max handling with windows.harness.
     add_compile_definitions(NOMINMAX)
 endif()
diff --git a/test_common/CMakeLists.txt b/test_common/CMakeLists.txt
index 61580300..b0505345 100644
--- a/test_common/CMakeLists.txt
+++ b/test_common/CMakeLists.txt
@@ -21,8 +21,3 @@ set(HARNESS_SOURCES
 )
 
 add_library(harness STATIC ${HARNESS_SOURCES})
-
-if(MSVC)
-    # Don't warn about using the portable "strdup" function.
-    target_compile_definitions(harness PRIVATE _CRT_NONSTDC_NO_DEPRECATE)
-endif()
\ No newline at end of file
-- 
cgit v1.2.3


From 0601c6f7658c80af50d6f6a2ac947682d75bcd50 Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Tue, 31 Aug 2021 14:45:24 -0400
Subject: Add missing include for gRandomSeed (#1307)

---
 test_common/harness/integer_ops_test_info.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_common/harness/integer_ops_test_info.h b/test_common/harness/integer_ops_test_info.h
index c25843dd..ad7b303b 100644
--- a/test_common/harness/integer_ops_test_info.h
+++ b/test_common/harness/integer_ops_test_info.h
@@ -18,6 +18,7 @@
 #define INTEGER_OPS_TEST_INFO_H
 
 #include "conversions.h"
+#include "testHarness.h"
 
 // TODO: expand usage to other tests.
 
-- 
cgit v1.2.3


From 34e47322db205d3c8c972ddebbf51bb4122e45f5 Mon Sep 17 00:00:00 2001
From: "Senran (Stephen) Zhang" <senran.zhang@intel.com>
Date: Tue, 7 Sep 2021 00:14:36 +0800
Subject: Limit workgroup size for atomics tests (#1197)

* Limit workgroup size for atomics tests

This avoids extremely large local buffer size and slow run

* Always limit workgroup size
---
 test_conformance/atomics/test_atomics.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index 34b34ed3..c0c01363 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -200,6 +200,10 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
         error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
         test_error( error, "Unable to obtain max work group size for device and kernel combo" );
 
+        // Limit workSize to avoid extremely large local buffer size and slow
+        // run.
+        if (workSize > 65536) workSize = 65536;
+
         // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
         if( maxSizes[0] < workSize )
         {
-- 
cgit v1.2.3


From 1f26e1d8ba372f4c638f9c0cdae7566e349b9b9a Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Tue, 7 Sep 2021 12:47:44 +0100
Subject: Fix memory model issue in `atomic_flag`. (#1283)

* Fix memory model issue in atomic_flag.

In atomic_flag sub-tests that modify local memory, compilers may re-order memory accesses between the local and global address spaces which can lead to incorrect test failures.

This commit ensures that both local and global memory operations are fenced to prevent this re-ordering from occurring.

Fixes #134.

* Clang format changes.

* Added missing global acquire which is necessary for the corresponding global release.

Thanks to @jlewis-austin for spotting.

* Clang format changes.

* Match the condition for applying acquire/release fences.
---
 test_conformance/c11_atomics/test_atomics.cpp | 36 ++++++++++++++++++---------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index c3a190b7..38b4e9a7 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1657,12 +1657,18 @@ public:
       "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
       "  {\n"
       "    bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
-    if (MemoryOrder() == MEMORY_ORDER_RELAXED || MemoryOrder() == MEMORY_ORDER_RELEASE)
-      program += "    atomic_work_item_fence(" +
-                 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
-                 "memory_order_acquire," +
-                 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
-                 ");\n";
+    if (MemoryOrder() == MEMORY_ORDER_RELAXED
+        || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
+        program += "    atomic_work_item_fence("
+            + std::string(LocalMemory()
+                              ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+                              : "CLK_GLOBAL_MEM_FENCE, ")
+            + "memory_order_acquire,"
+            + std::string(LocalMemory()
+                              ? "memory_scope_work_group"
+                              : (UseSVM() ? "memory_scope_all_svm_devices"
+                                          : "memory_scope_device"))
+            + ");\n";
 
     program +=
       "    if (!set)\n"
@@ -1683,12 +1689,18 @@ public:
       "        stop = 1;\n"
       "      }\n";
 
-    if (MemoryOrder() == MEMORY_ORDER_ACQUIRE || MemoryOrder() == MEMORY_ORDER_RELAXED)
-      program += "      atomic_work_item_fence(" +
-                 std::string(LocalMemory() ? "CLK_LOCAL_MEM_FENCE, " : "CLK_GLOBAL_MEM_FENCE, ") +
-                 "memory_order_release," +
-                 std::string(LocalMemory() ? "memory_scope_work_group" : (UseSVM() ? "memory_scope_all_svm_devices" : "memory_scope_device") ) +
-                 ");\n";
+    if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
+        || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
+        program += "      atomic_work_item_fence("
+            + std::string(LocalMemory()
+                              ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+                              : "CLK_GLOBAL_MEM_FENCE, ")
+            + "memory_order_release,"
+            + std::string(LocalMemory()
+                              ? "memory_scope_work_group"
+                              : (UseSVM() ? "memory_scope_all_svm_devices"
+                                          : "memory_scope_device"))
+            + ");\n";
 
     program +=
       "      atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
-- 
cgit v1.2.3


From 02bf24d2b1684b1ffde079d3598a8fc70610d4fc Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Mon, 13 Sep 2021 05:25:32 -0700
Subject: remove min max macros (#1310)

* remove the MIN and MAX macros and use the std versions instead

* fix formatting

* fix Arm build

* remove additional MIN and MAX macros from compat.h
---
 test_common/harness/compat.h                       |  7 ---
 test_common/harness/errorHelpers.cpp               | 20 ++++----
 test_common/harness/imageHelpers.cpp               |  3 --
 .../basic/test_enqueued_local_size.cpp             |  8 +--
 test_conformance/buffers/test_sub_buffers.cpp      | 22 ++++----
 test_conformance/conversions/test_conversions.cpp  |  7 ++-
 .../device_execution/enqueue_ndrange.cpp           |  3 +-
 .../device_execution/host_queue_order.cpp          |  3 +-
 test_conformance/half/Test_roundTrip.cpp           |  7 ++-
 test_conformance/half/Test_vLoadHalf.cpp           |  7 ++-
 test_conformance/half/Test_vStoreHalf.cpp          | 11 ++--
 .../images/kernel_read_write/test_common.cpp       | 25 ++++-----
 .../images/kernel_read_write/test_iterations.cpp   | 32 ++++++++----
 .../images/kernel_read_write/test_read_1D.cpp      | 26 +++++++---
 .../kernel_read_write/test_read_1D_array.cpp       | 27 ++++++----
 .../kernel_read_write/test_read_2D_array.cpp       | 32 ++++++++----
 test_conformance/integer_ops/test_add_sat.cpp      | 31 ++++-------
 test_conformance/integer_ops/test_integers.cpp     | 60 +++++++++++-----------
 test_conformance/integer_ops/test_sub_sat.cpp      | 32 ++++--------
 test_conformance/integer_ops/test_unary_ops.cpp    |  2 +-
 .../math_brute_force/macro_binary_double.cpp       |  3 +-
 .../math_brute_force/macro_binary_float.cpp        |  3 +-
 .../math_brute_force/macro_unary_double.cpp        |  3 +-
 .../math_brute_force/macro_unary_float.cpp         |  4 +-
 test_conformance/math_brute_force/main.cpp         |  5 +-
 test_conformance/profiling/execute.cpp             | 12 ++---
 test_conformance/workgroups/test_wg_broadcast.cpp  |  6 ++-
 .../workgroups/test_wg_scan_exclusive_max.cpp      | 11 ++--
 .../workgroups/test_wg_scan_exclusive_min.cpp      | 11 ++--
 .../workgroups/test_wg_scan_inclusive_max.cpp      | 10 ++--
 .../workgroups/test_wg_scan_inclusive_min.cpp      | 10 ++--
 31 files changed, 241 insertions(+), 202 deletions(-)

diff --git a/test_common/harness/compat.h b/test_common/harness/compat.h
index 3b557852..4053b7ee 100644
--- a/test_common/harness/compat.h
+++ b/test_common/harness/compat.h
@@ -309,13 +309,6 @@ EXTERN_C int __builtin_clz(unsigned int pattern);
 
 #endif
 
-#ifndef MIN
-#define MIN(x, y) (((x) < (y)) ? (x) : (y))
-#endif
-#ifndef MAX
-#define MAX(x, y) (((x) > (y)) ? (x) : (y))
-#endif
-
 
 /*-----------------------------------------------------------------------------
    WARNING: DO NOT USE THESE MACROS:
diff --git a/test_common/harness/errorHelpers.cpp b/test_common/harness/errorHelpers.cpp
index ea928bc3..eaccf641 100644
--- a/test_common/harness/errorHelpers.cpp
+++ b/test_common/harness/errorHelpers.cpp
@@ -18,6 +18,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <algorithm>
+
 #include "errorHelpers.h"
 
 #include "parseParameters.h"
@@ -301,10 +303,6 @@ const char *GetQueuePropertyName(cl_command_queue_properties property)
     }
 }
 
-#ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
-#endif
-
 #if defined(_MSC_VER)
 #define scalbnf(_a, _i) ldexpf(_a, _i)
 #define scalbn(_a, _i) ldexp(_a, _i)
@@ -357,7 +355,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
 
         // The unbiased exponent of the ulp unit place
         int ulp_exp =
-            HALF_MANT_DIG - 1 - MAX(ilogb(reference), HALF_MIN_EXP - 1);
+            HALF_MANT_DIG - 1 - std::max(ilogb(reference), HALF_MIN_EXP - 1);
 
         // Scale the exponent of the error
         return (float)scalbn(testVal - reference, ulp_exp);
@@ -365,7 +363,7 @@ static float Ulp_Error_Half_Float(float test, double reference)
 
     // reference is a normal power of two or a zero
     int ulp_exp =
-        HALF_MANT_DIG - 1 - MAX(ilogb(reference) - 1, HALF_MIN_EXP - 1);
+        HALF_MANT_DIG - 1 - std::max(ilogb(reference) - 1, HALF_MIN_EXP - 1);
 
     // Scale the exponent of the error
     return (float)scalbn(testVal - reference, ulp_exp);
@@ -437,7 +435,8 @@ float Ulp_Error(float test, double reference)
             return 0.0f; // if we are expecting a NaN, any NaN is fine
 
         // The unbiased exponent of the ulp unit place
-        int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference), FLT_MIN_EXP - 1);
+        int ulp_exp =
+            FLT_MANT_DIG - 1 - std::max(ilogb(reference), FLT_MIN_EXP - 1);
 
         // Scale the exponent of the error
         return (float)scalbn(testVal - reference, ulp_exp);
@@ -445,7 +444,8 @@ float Ulp_Error(float test, double reference)
 
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
-    int ulp_exp = FLT_MANT_DIG - 1 - MAX(ilogb(reference) - 1, FLT_MIN_EXP - 1);
+    int ulp_exp =
+        FLT_MANT_DIG - 1 - std::max(ilogb(reference) - 1, FLT_MIN_EXP - 1);
 
     // Scale the exponent of the error
     return (float)scalbn(testVal - reference, ulp_exp);
@@ -513,7 +513,7 @@ float Ulp_Error_Double(double test, long double reference)
 
         // The unbiased exponent of the ulp unit place
         int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+            DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
 
         // Scale the exponent of the error
         float result = (float)scalbnl(testVal - reference, ulp_exp);
@@ -529,7 +529,7 @@ float Ulp_Error_Double(double test, long double reference)
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
     int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+        DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
 
     // Scale the exponent of the error
     float result = (float)scalbnl(testVal - reference, ulp_exp);
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 314709f8..3a5c5533 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -690,9 +690,6 @@ int has_alpha(const cl_image_format *format)
         _b ^= _a;                                                              \
         _a ^= _b;                                                              \
     } while (0)
-#ifndef MAX
-#define MAX(_a, _b) ((_a) > (_b) ? (_a) : (_b))
-#endif
 
 void get_max_sizes(
     size_t *numberOfSizes, const int maxNumberOfSizes, size_t sizes[][3],
diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index f52162a8..91fe1434 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -14,13 +14,15 @@
 // limitations under the License.
 //
 #include "harness/compat.h"
+#include "harness/rounding_mode.h"
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include "harness/rounding_mode.h"
+
+#include <algorithm>
 
 #include "procs.h"
 
@@ -124,8 +126,8 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que
     err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
     test_error( err, "clGetDeviceInfo failed.");
 
-    localsize[0] = MIN(16, max_wgs);
-    localsize[1] = MIN(11, max_wgs / localsize[0]);
+    localsize[0] = std::min<size_t>(16, max_wgs);
+    localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
     // If we need to use uniform workgroups because non-uniform workgroups are
     // not supported, round up to the next global size that is divisible by the
     // local size.
diff --git a/test_conformance/buffers/test_sub_buffers.cpp b/test_conformance/buffers/test_sub_buffers.cpp
index 691509fd..d6ab111e 100644
--- a/test_conformance/buffers/test_sub_buffers.cpp
+++ b/test_conformance/buffers/test_sub_buffers.cpp
@@ -15,6 +15,8 @@
 //
 #include "procs.h"
 
+#include <algorithm>
+
 // Design:
 // To test sub buffers, we first create one main buffer. We then create several sub-buffers and
 // queue Actions on each one. Each Action is encapsulated in a class so it can keep track of
@@ -101,13 +103,6 @@ public:
     }
 };
 
-#ifndef MAX
-#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
-#endif
-
 class CopyAction : public Action
 {
 public:
@@ -117,7 +112,8 @@ public:
     virtual cl_int Execute( cl_context context, cl_command_queue queue, cl_char tag, SubBufferWrapper &buffer1, SubBufferWrapper &buffer2, cl_char *parentBufferState )
     {
         // Copy from sub-buffer 1 to sub-buffer 2
-        size_t size = get_random_size_t( 0, MIN( buffer1.mSize, buffer2.mSize ), GetRandSeed() );
+        size_t size = get_random_size_t(
+            0, std::min(buffer1.mSize, buffer2.mSize), GetRandSeed());
 
         size_t startOffset = get_random_size_t( 0, buffer1.mSize - size, GetRandSeed() );
         size_t endOffset = get_random_size_t( 0, buffer2.mSize - size, GetRandSeed() );
@@ -266,7 +262,11 @@ int test_sub_buffers_read_write_core( cl_context context, cl_command_queue queue
             endRange = mainSize;
 
         size_t offset = get_random_size_t( toStartFrom / addressAlign, endRange / addressAlign, Action::GetRandSeed() ) * addressAlign;
-        size_t size = get_random_size_t( 1, ( MIN( mainSize / 8, mainSize - offset ) ) / addressAlign, Action::GetRandSeed() ) * addressAlign;
+        size_t size =
+            get_random_size_t(
+                1, (std::min(mainSize / 8, mainSize - offset)) / addressAlign,
+                Action::GetRandSeed())
+            * addressAlign;
         error = subBuffers[ numSubBuffers ].Allocate( mainBuffer, CL_MEM_READ_WRITE, offset, size );
         test_error( error, "Unable to allocate sub buffer" );
 
@@ -443,7 +443,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
 
     error = get_reasonable_buffer_size( otherDevice, maxBuffer2 );
     test_error( error, "Unable to get buffer size for secondary device" );
-    maxBuffer1 = MIN( maxBuffer1, maxBuffer2 );
+    maxBuffer1 = std::min(maxBuffer1, maxBuffer2);
 
     cl_uint addressAlign1Bits, addressAlign2Bits;
     error = clGetDeviceInfo( deviceID, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign1Bits ), &addressAlign1Bits, NULL );
@@ -452,7 +452,7 @@ int test_sub_buffers_read_write_dual_devices( cl_device_id deviceID, cl_context
     error = clGetDeviceInfo( otherDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof( addressAlign2Bits ), &addressAlign2Bits, NULL );
     test_error( error, "Unable to get secondary device's address alignment" );
 
-    cl_uint addressAlign1 = MAX( addressAlign1Bits, addressAlign2Bits ) / 8;
+    cl_uint addressAlign1 = std::max(addressAlign1Bits, addressAlign2Bits) / 8;
 
     // Finally time to run!
     return test_sub_buffers_read_write_core( testingContext, queue1, queue2, maxBuffer1, addressAlign1 );
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 87b8ead7..e8e572e6 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -47,6 +47,8 @@
 #endif
 #include <time.h>
 
+#include <algorithm>
+
 #include "Sleep.h"
 #include "basic_test_conversions.h"
 
@@ -1003,7 +1005,8 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
     uint64_t i;
 
     gTestCount++;
-    size_t blockCount = BUFFER_SIZE / MAX( gTypeSizes[ inType ], gTypeSizes[ outType ] );
+    size_t blockCount =
+        BUFFER_SIZE / std::max(gTypeSizes[inType], gTypeSizes[outType]);
     size_t step = blockCount;
     uint64_t lastCase = 1ULL << (8*gTypeSizes[ inType ]);
     cl_event writeInputBuffer = NULL;
@@ -1078,7 +1081,7 @@ static int DoTest( cl_device_id device, Type outType, Type inType, SaturationMod
             fflush(stdout);
         }
 
-        cl_uint count = (uint32_t) MIN( blockCount, lastCase - i );
+        cl_uint count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
         writeInputBufferInfo.count = count;
 
         // Crate a user event to represent the status of the reference value computation completion
diff --git a/test_conformance/device_execution/enqueue_ndrange.cpp b/test_conformance/device_execution/enqueue_ndrange.cpp
index 8ced6629..f228f063 100644
--- a/test_conformance/device_execution/enqueue_ndrange.cpp
+++ b/test_conformance/device_execution/enqueue_ndrange.cpp
@@ -18,6 +18,7 @@
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
 
+#include <algorithm>
 #include <vector>
 
 #include "procs.h"
@@ -645,7 +646,7 @@ int test_enqueue_ndrange(cl_device_id device, cl_context context, cl_command_que
     max_local_size = (max_local_size > MAX_GWS)? MAX_GWS: max_local_size;
     if(gWimpyMode)
     {
-        max_local_size = MIN(8, max_local_size);
+        max_local_size = std::min((size_t)8, max_local_size);
     }
 
     cl_uint num = 10;
diff --git a/test_conformance/device_execution/host_queue_order.cpp b/test_conformance/device_execution/host_queue_order.cpp
index 2b5688d1..5376ea40 100644
--- a/test_conformance/device_execution/host_queue_order.cpp
+++ b/test_conformance/device_execution/host_queue_order.cpp
@@ -18,6 +18,7 @@
 #include "harness/testHarness.h"
 #include "harness/typeWrappers.h"
 
+#include <algorithm>
 #include <vector>
 
 #include "procs.h"
@@ -124,7 +125,7 @@ int test_host_queue_order(cl_device_id device, cl_context context, cl_command_qu
     cl_uint num = arr_size(result);
     if( gWimpyMode )
     {
-        num = MAX(num / 16, 4);
+        num = std::max(num / 16, 4U);
     }
 
     clMemWrapper res_mem;
diff --git a/test_conformance/half/Test_roundTrip.cpp b/test_conformance/half/Test_roundTrip.cpp
index 69fc7e41..1ab40937 100644
--- a/test_conformance/half/Test_roundTrip.cpp
+++ b/test_conformance/half/Test_roundTrip.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //
 #include <string.h>
+
+#include <algorithm>
+
 #include "cl_utils.h"
 #include "tests.h"
 #include "harness/testHarness.h"
@@ -156,7 +159,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu
     }
 
     // Figure out how many elements are in a work block
-    size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+    size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float));
     size_t blockCount = (size_t)getBufferSize(device) / elementSize; //elementSize is a power of two
     uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of cl_half
     size_t stride = blockCount;
@@ -168,7 +171,7 @@ int test_roundTrip( cl_device_id device, cl_context context, cl_command_queue qu
 
     for( i = 0; i < (uint64_t)lastCase; i += stride )
     {
-        count = (uint32_t) MIN( blockCount, lastCase - i );
+        count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
 
         //Init the input stream
         uint16_t *p = (uint16_t *)gIn_half;
diff --git a/test_conformance/half/Test_vLoadHalf.cpp b/test_conformance/half/Test_vLoadHalf.cpp
index 5dfac7a3..e9354019 100644
--- a/test_conformance/half/Test_vLoadHalf.cpp
+++ b/test_conformance/half/Test_vLoadHalf.cpp
@@ -17,6 +17,9 @@
 #include "harness/testHarness.h"
 
 #include <string.h>
+
+#include <algorithm>
+
 #include "cl_utils.h"
 #include "tests.h"
 
@@ -429,7 +432,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
     }
 
     // Figure out how many elements are in a work block
-    size_t elementSize = MAX( sizeof(cl_half), sizeof(cl_float));
+    size_t elementSize = std::max(sizeof(cl_half), sizeof(cl_float));
     size_t blockCount = getBufferSize(device) / elementSize; // elementSize is power of 2
     uint64_t lastCase = 1ULL << (8*sizeof(cl_half)); // number of things of size cl_half
 
@@ -447,7 +450,7 @@ int Test_vLoadHalf_private( cl_device_id device, bool aligned )
 
     for( i = 0; i < (uint64_t)lastCase; i += blockCount )
     {
-        count = (uint32_t) MIN( blockCount, lastCase - i );
+        count = (uint32_t)std::min((uint64_t)blockCount, lastCase - i);
 
         //Init the input stream
         uint16_t *p = (uint16_t *)gIn_half;
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index c3a328ad..85824a9f 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -18,6 +18,9 @@
 #include "harness/testHarness.h"
 
 #include <string.h>
+
+#include <algorithm>
+
 #include "cl_utils.h"
 #include "tests.h"
 
@@ -674,7 +677,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
     } // end for vector size
 
     // Figure out how many elements are in a work block
-    size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+    size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
     size_t blockCount = BUFFER_SIZE / elementSize; // elementSize is power of 2
     uint64_t lastCase = 1ULL << (8*sizeof(float)); // number of floats.
     size_t stride = blockCount;
@@ -726,7 +729,7 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
 
     for( i = 0; i < lastCase; i += stride )
     {
-        count = (cl_uint) MIN( blockCount, lastCase - i );
+        count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
         fref.i = i;
         dref.i = i;
 
@@ -1272,7 +1275,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
     }
 
     // Figure out how many elements are in a work block
-    size_t elementSize = MAX( sizeof(cl_ushort), sizeof(float));
+    size_t elementSize = std::max(sizeof(cl_ushort), sizeof(float));
     size_t blockCount = BUFFER_SIZE / elementSize;
     uint64_t lastCase = 1ULL << (8*sizeof(float));
     size_t stride = blockCount;
@@ -1323,7 +1326,7 @@ int Test_vStoreaHalf_private( cl_device_id device, f2h referenceFunc, d2h double
 
     for( i = 0; i < (uint64_t)lastCase; i += stride )
     {
-        count = (cl_uint) MIN( blockCount, lastCase - i );
+        count = (cl_uint)std::min((uint64_t)blockCount, lastCase - i);
         fref.i = i;
         dref.i = i;
 
diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index 375ee587..6b3cf849 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -16,6 +16,7 @@
 
 #include "test_common.h"
 
+#include <algorithm>
 
 cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool test_mipmaps, cl_int *error) {
     cl_sampler sampler = nullptr;
@@ -934,13 +935,13 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                         {
                                             err4 = 0.0f;
                                         }
-                                        float maxErr1 = MAX(
+                                        float maxErr1 = std::max(
                                             maxErr * maxPixel.p[0], FLT_MIN);
-                                        float maxErr2 = MAX(
+                                        float maxErr2 = std::max(
                                             maxErr * maxPixel.p[1], FLT_MIN);
-                                        float maxErr3 = MAX(
+                                        float maxErr3 = std::max(
                                             maxErr * maxPixel.p[2], FLT_MIN);
-                                        float maxErr4 = MAX(
+                                        float maxErr4 = std::max(
                                             maxErr * maxPixel.p[3], FLT_MIN);
 
                                         if (!(err1 <= maxErr1)
@@ -1039,17 +1040,17 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                             float err4 = ABS_ERROR(resultPtr[3],
                                                                    expected[3]);
                                             float maxErr1 =
-                                                MAX(maxErr * maxPixel.p[0],
-                                                    FLT_MIN);
+                                                std::max(maxErr * maxPixel.p[0],
+                                                         FLT_MIN);
                                             float maxErr2 =
-                                                MAX(maxErr * maxPixel.p[1],
-                                                    FLT_MIN);
+                                                std::max(maxErr * maxPixel.p[1],
+                                                         FLT_MIN);
                                             float maxErr3 =
-                                                MAX(maxErr * maxPixel.p[2],
-                                                    FLT_MIN);
+                                                std::max(maxErr * maxPixel.p[2],
+                                                         FLT_MIN);
                                             float maxErr4 =
-                                                MAX(maxErr * maxPixel.p[3],
-                                                    FLT_MIN);
+                                                std::max(maxErr * maxPixel.p[3],
+                                                         FLT_MIN);
 
 
                                             if (!(err1 <= maxErr1)
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 03ca9595..3b779fab 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -16,6 +16,8 @@
 #include "test_common.h"
 #include <float.h>
 
+#include <algorithm>
+
 #if defined( __APPLE__ )
     #include <signal.h>
     #include <sys/signal.h>
@@ -434,7 +436,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
                         float err1 = ABS_ERROR(resultPtr[0], expected[0]);
                         // Clamp to the minimum absolute error for the format
                         if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
-                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                        float maxErr1 =
+                            std::max(maxErr * maxPixel.p[0], FLT_MIN);
 
                         // Check if the result matches.
                         if( ! (err1 <= maxErr1) )
@@ -484,7 +487,8 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
                                                                                     imageSampler, expected, 0, &containsDenormals );
 
                             float err1 = ABS_ERROR(resultPtr[0], expected[0]);
-                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                            float maxErr1 =
+                                std::max(maxErr * maxPixel.p[0], FLT_MIN);
 
 
                             if( ! (err1 <= maxErr1) )
@@ -598,10 +602,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
                         if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
                         if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
                         if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
-                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                        float maxErr1 =
+                            std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                        float maxErr2 =
+                            std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                        float maxErr3 =
+                            std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                        float maxErr4 =
+                            std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
                         // Check if the result matches.
                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
@@ -671,10 +679,14 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
                             float err2 = ABS_ERROR(resultPtr[1], expected[1]);
                             float err3 = ABS_ERROR(resultPtr[2], expected[2]);
                             float err4 = ABS_ERROR(resultPtr[3], expected[3]);
-                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            float maxErr1 =
+                                std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                            float maxErr2 =
+                                std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                            float maxErr3 =
+                                std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                            float maxErr4 =
+                                std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
 
                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index c9ba4e84..68113f9a 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -17,6 +17,8 @@
 #include "test_common.h"
 #include <float.h>
 
+#include <algorithm>
+
 #if defined( __APPLE__ )
     #include <signal.h>
     #include <sys/signal.h>
@@ -669,10 +671,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
                             if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
                             if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
                             if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
-                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            float maxErr1 =
+                                std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                            float maxErr2 =
+                                std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                            float maxErr3 =
+                                std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                            float maxErr4 =
+                                std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
                             // Check if the result matches.
                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
@@ -732,10 +738,14 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
                                     ABS_ERROR(resultPtr[2], expected[2]);
                                 float err4 =
                                     ABS_ERROR(resultPtr[3], expected[3]);
-                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                float maxErr1 =
+                                    std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                                float maxErr2 =
+                                    std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                                float maxErr3 =
+                                    std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                                float maxErr4 =
+                                    std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
 
                                 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index b3287ded..ac266ad7 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -16,13 +16,14 @@
 #include "test_common.h"
 #include <float.h>
 
+#include <algorithm>
+
 #if defined( __APPLE__ )
 #include <signal.h>
 #include <sys/signal.h>
 #include <setjmp.h>
 #endif
 
-
 const char *read1DArrayKernelSourcePattern =
 "__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
 "{\n"
@@ -772,10 +773,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
                             if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
                             if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
                             if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
-                            float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                            float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                            float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                            float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                            float maxErr1 =
+                                std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                            float maxErr2 =
+                                std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                            float maxErr3 =
+                                std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                            float maxErr4 =
+                                std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
                             // Check if the result matches.
                             if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
@@ -838,10 +843,14 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
                                     ABS_ERROR(resultPtr[2], expected[2]);
                                 float err4 =
                                     ABS_ERROR(resultPtr[3], expected[3]);
-                                float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                                float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                                float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                                float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                float maxErr1 =
+                                    std::max(maxErr * maxPixel.p[0], FLT_MIN);
+                                float maxErr2 =
+                                    std::max(maxErr * maxPixel.p[1], FLT_MIN);
+                                float maxErr3 =
+                                    std::max(maxErr * maxPixel.p[2], FLT_MIN);
+                                float maxErr4 =
+                                    std::max(maxErr * maxPixel.p[3], FLT_MIN);
 
 
                                 if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    ||
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 7cb334b2..11b78814 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -16,6 +16,8 @@
 #include "test_common.h"
 #include <float.h>
 
+#include <algorithm>
+
 // Utility function to clamp down image sizes for certain tests to avoid
 // using too much memory.
 static size_t reduceImageSizeRange(size_t maxDimSize) {
@@ -617,7 +619,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                                         ABS_ERROR(resultPtr[0], expected[0]);
                                     // Clamp to the minimum absolute error for the format
                                     if (err1 > 0 && err1 < formatAbsoluteError) { err1 = 0.0f; }
-                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                    float maxErr1 = std::max(
+                                        maxErr * maxPixel.p[0], FLT_MIN);
 
                                     if( ! (err1 <= maxErr1) )
                                     {
@@ -661,7 +664,8 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                         float err1 = ABS_ERROR(resultPtr[0],
                                                                expected[0]);
-                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
+                                        float maxErr1 = std::max(
+                                            maxErr * maxPixel.p[0], FLT_MIN);
 
 
                                         if( ! (err1 <= maxErr1) )
@@ -942,10 +946,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                                     if (err2 > 0 && err2 < formatAbsoluteError) { err2 = 0.0f; }
                                     if (err3 > 0 && err3 < formatAbsoluteError) { err3 = 0.0f; }
                                     if (err4 > 0 && err4 < formatAbsoluteError) { err4 = 0.0f; }
-                                    float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                                    float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                                    float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                                    float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                    float maxErr1 = std::max(
+                                        maxErr * maxPixel.p[0], FLT_MIN);
+                                    float maxErr2 = std::max(
+                                        maxErr * maxPixel.p[1], FLT_MIN);
+                                    float maxErr3 = std::max(
+                                        maxErr * maxPixel.p[2], FLT_MIN);
+                                    float maxErr4 = std::max(
+                                        maxErr * maxPixel.p[3], FLT_MIN);
 
                                     if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
                                     {
@@ -1004,10 +1012,14 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                                                                expected[2]);
                                         float err4 = ABS_ERROR(resultPtr[3],
                                                                expected[3]);
-                                        float maxErr1 = MAX( maxErr * maxPixel.p[0], FLT_MIN );
-                                        float maxErr2 = MAX( maxErr * maxPixel.p[1], FLT_MIN );
-                                        float maxErr3 = MAX( maxErr * maxPixel.p[2], FLT_MIN );
-                                        float maxErr4 = MAX( maxErr * maxPixel.p[3], FLT_MIN );
+                                        float maxErr1 = std::max(
+                                            maxErr * maxPixel.p[0], FLT_MIN);
+                                        float maxErr2 = std::max(
+                                            maxErr * maxPixel.p[1], FLT_MIN);
+                                        float maxErr3 = std::max(
+                                            maxErr * maxPixel.p[2], FLT_MIN);
+                                        float maxErr4 = std::max(
+                                            maxErr * maxPixel.p[3], FLT_MIN);
 
 
                                         if( ! (err1 <= maxErr1) || ! (err2 <= maxErr2)    || ! (err3 <= maxErr3) || ! (err4 <= maxErr4) )
diff --git a/test_conformance/integer_ops/test_add_sat.cpp b/test_conformance/integer_ops/test_add_sat.cpp
index c0e45d11..e33f5c67 100644
--- a/test_conformance/integer_ops/test_add_sat.cpp
+++ b/test_conformance/integer_ops/test_add_sat.cpp
@@ -21,18 +21,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include "procs.h"
-
-#define UCHAR_MIN   0
-#define USHRT_MIN   0
-#define UINT_MIN    0
+#include <algorithm>
 
-#ifndef MAX
-#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
-#endif
+#include "procs.h"
 
 static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
 {
@@ -40,8 +31,8 @@ static int verify_addsat_char( const cl_char *inA, const cl_char *inB, const cl_
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
-        r = MAX( r, CL_CHAR_MIN );
-        r = MIN( r, CL_CHAR_MAX );
+        r = std::max(r, CL_CHAR_MIN);
+        r = std::min(r, CL_CHAR_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for add_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -55,9 +46,9 @@ static int verify_addsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const
     for( i = 0; i < n; i++ )
     {
         cl_int r = (int) inA[i] + (int) inB[i];
-        r = MAX( r, 0 );
-        r = MIN( r, CL_UCHAR_MAX );
-        if( r != outptr[i] )
+        r = std::max(r, 0);
+        r = std::min(r, CL_UCHAR_MAX);
+        if (r != outptr[i])
         { log_info( "\n%d) Failure for add_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
     return 0;
@@ -69,8 +60,8 @@ static int verify_addsat_short( const cl_short *inA, const cl_short *inB, const
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
-        r = MAX( r, CL_SHRT_MIN );
-        r = MIN( r, CL_SHRT_MAX );
+        r = std::max(r, CL_SHRT_MIN);
+        r = std::min(r, CL_SHRT_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for add_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -84,8 +75,8 @@ static int verify_addsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] + (cl_int) inB[i];
-        r = MAX( r, 0 );
-        r = MIN( r, CL_USHRT_MAX );
+        r = std::max(r, 0);
+        r = std::min(r, CL_USHRT_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for add_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
diff --git a/test_conformance/integer_ops/test_integers.cpp b/test_conformance/integer_ops/test_integers.cpp
index 8d77b24b..6fa18e1e 100644
--- a/test_conformance/integer_ops/test_integers.cpp
+++ b/test_conformance/integer_ops/test_integers.cpp
@@ -16,14 +16,9 @@
 #include "testBase.h"
 #include "harness/conversions.h"
 
-#define TEST_SIZE 512
+#include <algorithm>
 
-#ifndef MIN
-    #define MIN( _a, _b )   ((_a) < (_b) ? (_a) : (_b))
-#endif
-#ifndef MAX
-    #define MAX( _a, _b )   ((_a) > (_b) ? (_a) : (_b))
-#endif
+#define TEST_SIZE 512
 
 const char *singleParamIntegerKernelSourcePattern =
 "__kernel void sample_test(__global %s *sourceA, __global %s *destValues)\n"
@@ -1512,19 +1507,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de
         switch( vecAType )
         {
             case kULong:
-                ((cl_ulong*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+                ((cl_ulong *)destination)[0] =
+                    std::max(std::min(valueA, valueC), valueB);
                 break;
             case kUInt:
-                ((cl_uint*) destination)[0] = (cl_uint)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_uint *)destination)[0] =
+                    (cl_uint)(std::max(std::min(valueA, valueC), valueB));
                 break;
             case kUShort:
-                ((cl_ushort*) destination)[0] = (cl_ushort)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_ushort *)destination)[0] =
+                    (cl_ushort)(std::max(std::min(valueA, valueC), valueB));
                 break;
             case kUChar:
-                ((cl_uchar*) destination)[0] = (cl_uchar)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_uchar *)destination)[0] =
+                    (cl_uchar)(std::max(std::min(valueA, valueC), valueB));
                 break;
             default:
                 //error -- should never get here
@@ -1576,19 +1572,20 @@ bool verify_integer_clamp( void *sourceA, void *sourceB, void *sourceC, void *de
         switch( vecAType )
         {
             case kLong:
-                ((cl_long*) destination)[0] = MAX(MIN(valueA, valueC), valueB);
+                ((cl_long *)destination)[0] =
+                    std::max(std::min(valueA, valueC), valueB);
                 break;
             case kInt:
-                ((cl_int*) destination)[0] = (cl_int)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_int *)destination)[0] =
+                    (cl_int)(std::max(std::min(valueA, valueC), valueB));
                 break;
             case kShort:
-                ((cl_short*) destination)[0] = (cl_short)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_short *)destination)[0] =
+                    (cl_short)(std::max(std::min(valueA, valueC), valueB));
                 break;
             case kChar:
-                ((cl_char*) destination)[0] = (cl_char)
-                    (MAX(MIN(valueA, valueC), valueB));
+                ((cl_char *)destination)[0] =
+                    (cl_char)(std::max(std::min(valueA, valueC), valueB));
                 break;
             default:
                 //error -- should never get here
@@ -1654,13 +1651,16 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *
                 ((cl_ulong*) destination)[0] = multLo;
                 break;
             case kUInt:
-                ((cl_uint*) destination)[0] = (cl_uint) MIN( multLo, (cl_ulong) CL_UINT_MAX );
+                ((cl_uint *)destination)[0] =
+                    (cl_uint)std::min(multLo, (cl_ulong)CL_UINT_MAX);
                 break;
             case kUShort:
-                ((cl_ushort*) destination)[0] = (cl_ushort) MIN( multLo, (cl_ulong) CL_USHRT_MAX );
+                ((cl_ushort *)destination)[0] =
+                    (cl_ushort)std::min(multLo, (cl_ulong)CL_USHRT_MAX);
                 break;
             case kUChar:
-                ((cl_uchar*) destination)[0] = (cl_uchar) MIN( multLo, (cl_ulong) CL_UCHAR_MAX );
+                ((cl_uchar *)destination)[0] =
+                    (cl_uchar)std::min(multLo, (cl_ulong)CL_UCHAR_MAX);
                 break;
             default:
                 //error -- should never get here
@@ -1744,18 +1744,18 @@ bool verify_integer_mad_sat( void *sourceA, void *sourceB, void *sourceC, void *
                 ((cl_long*) destination)[0] = result;
                 break;
             case kInt:
-                result = MIN( result, (cl_long) CL_INT_MAX );
-                result = MAX( result, (cl_long) CL_INT_MIN );
+                result = std::min(result, (cl_long)CL_INT_MAX);
+                result = std::max(result, (cl_long)CL_INT_MIN);
                 ((cl_int*) destination)[0] = (cl_int) result;
                 break;
             case kShort:
-                result = MIN( result, (cl_long) CL_SHRT_MAX );
-                result = MAX( result, (cl_long) CL_SHRT_MIN );
+                result = std::min(result, (cl_long)CL_SHRT_MAX);
+                result = std::max(result, (cl_long)CL_SHRT_MIN);
                 ((cl_short*) destination)[0] = (cl_short) result;
                 break;
             case kChar:
-                result = MIN( result, (cl_long) CL_CHAR_MAX );
-                result = MAX( result, (cl_long) CL_CHAR_MIN );
+                result = std::min(result, (cl_long)CL_CHAR_MAX);
+                result = std::max(result, (cl_long)CL_CHAR_MIN);
                 ((cl_char*) destination)[0] = (cl_char) result;
                 break;
             default:
diff --git a/test_conformance/integer_ops/test_sub_sat.cpp b/test_conformance/integer_ops/test_sub_sat.cpp
index 845d1064..2a88ee0d 100644
--- a/test_conformance/integer_ops/test_sub_sat.cpp
+++ b/test_conformance/integer_ops/test_sub_sat.cpp
@@ -21,19 +21,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include "procs.h"
-
-#define UCHAR_MIN   0
-#define USHRT_MIN   0
-#define UINT_MIN    0
-
-#ifndef MAX
-#define MAX( _a, _b )   ( (_a) > (_b) ? (_a) : (_b) )
-#endif
-#ifndef MIN
-#define MIN( _a, _b )   ( (_a) < (_b) ? (_a) : (_b) )
-#endif
+#include <algorithm>
 
+#include "procs.h"
 
 static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_char *outptr, int n, const char *sizeName, int vecSize )
 {
@@ -41,8 +31,8 @@ static int verify_subsat_char( const cl_char *inA, const cl_char *inB, const cl_
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
-        r = MAX( r, CL_CHAR_MIN );
-        r = MIN( r, CL_CHAR_MAX );
+        r = std::max(r, CL_CHAR_MIN);
+        r = std::min(r, CL_CHAR_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for sub_sat( (char%s) 0x%2.2x, (char%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -56,9 +46,9 @@ static int verify_subsat_uchar( const cl_uchar *inA, const cl_uchar *inB, const
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
-        r = MAX( r, 0 );
-        r = MIN( r, CL_UCHAR_MAX );
-        if( r != outptr[i] )
+        r = std::max(r, 0);
+        r = std::min(r, CL_UCHAR_MAX);
+        if (r != outptr[i])
         { log_info( "\n%d) Failure for sub_sat( (uchar%s) 0x%2.2x, (uchar%s) 0x%2.2x) = *0x%2.2x vs 0x%2.2x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
     }
     return 0;
@@ -70,8 +60,8 @@ static int verify_subsat_short( const cl_short *inA, const cl_short *inB, const
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
-        r = MAX( r, CL_SHRT_MIN );
-        r = MIN( r, CL_SHRT_MAX );
+        r = std::max(r, CL_SHRT_MIN);
+        r = std::min(r, CL_SHRT_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for sub_sat( (short%s) 0x%4.4x, (short%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
@@ -85,8 +75,8 @@ static int verify_subsat_ushort( const cl_ushort *inA, const cl_ushort *inB, con
     for( i = 0; i < n; i++ )
     {
         cl_int r = (cl_int) inA[i] - (cl_int) inB[i];
-        r = MAX( r, 0 );
-        r = MIN( r, CL_USHRT_MAX );
+        r = std::max(r, 0);
+        r = std::min(r, CL_USHRT_MAX);
 
         if( r != outptr[i] )
         { log_info( "\n%d) Failure for sub_sat( (ushort%s) 0x%4.4x, (ushort%s) 0x%4.4x) = *0x%4.4x vs 0x%4.4x\n", i, sizeName, inA[i], sizeName, inB[i], r, outptr[i] ); return -1; }
diff --git a/test_conformance/integer_ops/test_unary_ops.cpp b/test_conformance/integer_ops/test_unary_ops.cpp
index 72940eaa..c91c85ae 100644
--- a/test_conformance/integer_ops/test_unary_ops.cpp
+++ b/test_conformance/integer_ops/test_unary_ops.cpp
@@ -107,7 +107,7 @@ int test_unary_op( cl_command_queue queue, cl_context context, OpKonstants which
             // For sub ops, the min control value is 2. Otherwise, it's 0
             controlData[ i ] |= 0x02;
         else if( whichOp == kIncrement )
-            // For addition ops, the MAX control value is 1. Otherwise, it's 3
+            // For addition ops, the max control value is 1. Otherwise, it's 3
             controlData[ i ] &= ~0x02;
     }
     streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 6db6aa56..d3e8071f 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -496,7 +496,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
 
 
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = std::max(1U, gMinVectorSizeIndex);
+             k < gMaxVectorSizeIndex; k++)
         {
             q = (cl_long *)out[k];
             // If we aren't getting the correctly rounded result
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index d6d5c8eb..6c7c8c05 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -485,7 +485,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             goto exit;
         }
 
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = std::max(1U, gMinVectorSizeIndex);
+             k < gMaxVectorSizeIndex; k++)
         {
             q = out[k];
             // If we aren't getting the correctly rounded result
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 1978c185..7f3521c6 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -304,7 +304,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
 
 
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = std::max(1U, gMinVectorSizeIndex);
+             k < gMaxVectorSizeIndex; k++)
         {
             q = out[k];
             // If we aren't getting the correctly rounded result
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index ece5e9b6..0cd54de4 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -309,8 +309,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             }
 
 
-            for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex;
-                 k++)
+            for (auto k = std::max(1U, gMinVectorSizeIndex);
+                 k < gMaxVectorSizeIndex; k++)
             {
                 q = out[k];
                 // If we aren't getting the correctly rounded result
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 6691f462..1a6e0c4e 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -18,6 +18,7 @@
 #include "sleep.h"
 #include "utility.h"
 
+#include <algorithm>
 #include <cstdio>
 #include <cstdlib>
 #include <ctime>
@@ -1239,7 +1240,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
 
         // The unbiased exponent of the ulp unit place
         int ulp_exp =
-            DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
+            DBL_MANT_DIG - 1 - std::max(ilogbl(reference), DBL_MIN_EXP - 1);
 
         // Scale the exponent of the error
         float result = (float)scalbnl(testVal - reference, ulp_exp);
@@ -1255,7 +1256,7 @@ float Bruteforce_Ulp_Error_Double(double test, long double reference)
     // reference is a normal power of two or a zero
     // The unbiased exponent of the ulp unit place
     int ulp_exp =
-        DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
+        DBL_MANT_DIG - 1 - std::max(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
 
     // allow correctly rounded results to pass through unmolested. (We might add
     // error to it below.) There is something of a performance optimization here
diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp
index edfc043c..0541bfa5 100644
--- a/test_conformance/profiling/execute.cpp
+++ b/test_conformance/profiling/execute.cpp
@@ -21,6 +21,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+
 #include "procs.h"
 #include "harness/testHarness.h"
 #include "harness/errorHelpers.h"
@@ -29,12 +31,6 @@
 typedef unsigned char uchar;
 #endif
 
-#undef MIN
-#define MIN(x,y)    ( (x) < (y) ? (x) : (y) )
-
-#undef MAX
-#define MAX(x,y)    ( (x) > (y) ? (x) : (y) )
-
 //#define CREATE_OUTPUT    1
 
 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
@@ -73,8 +69,8 @@ static const char *image_filter_src =
 static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
 {
     // clamp the coords
-    int    x0 = MIN( MAX( x, 0 ), w - 1 );
-    int    y0 = MIN( MAX( y, 0 ), h - 1 );
+    int x0 = std::min(std::max(x, 0), w - 1);
+    int y0 = std::min(std::max(y, 0), h - 1);
 
     // get tine index
     int    indx = ( y0 * w + x0 ) * nChannels;
diff --git a/test_conformance/workgroups/test_wg_broadcast.cpp b/test_conformance/workgroups/test_wg_broadcast.cpp
index 35559476..29380211 100644
--- a/test_conformance/workgroups/test_wg_broadcast.cpp
+++ b/test_conformance/workgroups/test_wg_broadcast.cpp
@@ -20,6 +20,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+
 #include "procs.h"
 
 
@@ -310,7 +312,7 @@ test_work_group_broadcast_2D(cl_device_id device, cl_context context, cl_command
         localsize[0] = localsize[1] = 1;
     }
 
-    num_workgroups = MAX(n_elems/wg_size[0], 16);
+    num_workgroups = std::max(n_elems / wg_size[0], (size_t)16);
     globalsize[0] = num_workgroups * localsize[0];
     globalsize[1] = num_workgroups * localsize[1];
     num_elements = globalsize[0] * globalsize[1];
@@ -437,7 +439,7 @@ test_work_group_broadcast_3D(cl_device_id device, cl_context context, cl_command
         localsize[0] = localsize[1] = localsize[2] = 1;
     }
 
-    num_workgroups = MAX(n_elems/wg_size[0], 8);
+    num_workgroups = std::max(n_elems / wg_size[0], (size_t)8);
     globalsize[0] = num_workgroups * localsize[0];
     globalsize[1] = num_workgroups * localsize[1];
     globalsize[2] = num_workgroups * localsize[2];
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
index 12338b68..644b3ccf 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
@@ -20,8 +20,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include "procs.h"
+#include <algorithm>
 
+#include "procs.h"
 
 const char *wg_scan_exclusive_max_kernel_code_int =
 "__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n"
@@ -79,7 +80,7 @@ verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si
                 log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
             }
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
         }
     }
 
@@ -103,7 +104,7 @@ verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz
                 log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
             }
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
         }
     }
 
@@ -127,7 +128,7 @@ verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz
                 log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
             }
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
         }
     }
 
@@ -151,7 +152,7 @@ verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n,
                 log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
             }
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
         }
     }
 
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
index f4e6bf97..3c6dfc87 100644
--- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
@@ -20,8 +20,9 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
-#include "procs.h"
+#include <algorithm>
 
+#include "procs.h"
 
 const char *wg_scan_exclusive_min_kernel_code_int =
 "__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n"
@@ -80,7 +81,7 @@ verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si
                 log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
             }
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
         }
     }
 
@@ -104,7 +105,7 @@ verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz
                 log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]);
                 return -1;
             }
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
         }
     }
 
@@ -128,7 +129,7 @@ verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz
                 log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
             }
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
         }
     }
 
@@ -152,7 +153,7 @@ verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n,
                 log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
             }
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
         }
     }
 
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
index 44ebf805..2a2e230e 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
@@ -20,6 +20,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+
 #include "procs.h"
 
 
@@ -75,7 +77,7 @@ verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_si
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
             if (outptr[j+i] != max_) {
                 log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
@@ -99,7 +101,7 @@ verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, siz
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
             if (outptr[j+i] != max_) {
                 log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]);
                 return -1;
@@ -123,7 +125,7 @@ verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, siz
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
             if (outptr[j+i] != max_) {
                 log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
@@ -147,7 +149,7 @@ verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n,
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            max_ = MAX(inptr[j+i], max_);
+            max_ = std::max(inptr[j + i], max_);
             if (outptr[j+i] != max_) {
                 log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
                 return -1;
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
index f2f05788..adbdad56 100644
--- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
+++ b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
@@ -20,6 +20,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <algorithm>
+
 #include "procs.h"
 
 
@@ -75,7 +77,7 @@ verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_si
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
             if (outptr[j+i] != min_) {
                 log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
@@ -99,7 +101,7 @@ verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, siz
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
             if (outptr[j+i] != min_) {
                 log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
@@ -123,7 +125,7 @@ verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, siz
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
             if (outptr[j+i] != min_) {
                 log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
@@ -147,7 +149,7 @@ verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n,
             m = wg_size;
 
         for (i = 0; i < m; ++i) {
-            min_ = MIN(inptr[j+i], min_);
+            min_ = std::min(inptr[j + i], min_);
             if (outptr[j+i] != min_) {
                 log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
                 return -1;
-- 
cgit v1.2.3


From ddca0f802bee72ff9ea90b1dab28dddc51ef9a20 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 28 Sep 2021 11:19:17 -0600
Subject: gles: Fix double frees. (#1323)

* gles: Fix double frees.

Remove a few explicit frees in the redirect_buffers test which are
already handled by a wrapper.

* gles: Fix double frees

A recent update to the object wrapper classes (#1268) changed the
behavior of assigning to a wrapper, whereby the wrapped object is now
released upon assignment. A couple of tests were manually calling
clReleaseMemObject and then assigning `nullptr` to the wrapper,
resulting in the wrapper calling clReleaseMemObject on an object that
had already been destroyed.

Co-authored-by: spauls <spauls@qti.qualcomm.com>
---
 test_conformance/gles/test_buffers.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test_conformance/gles/test_buffers.cpp b/test_conformance/gles/test_buffers.cpp
index a2d67322..73711261 100644
--- a/test_conformance/gles/test_buffers.cpp
+++ b/test_conformance/gles/test_buffers.cpp
@@ -205,10 +205,10 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
   if (validate_only) {
     int result = (CheckGLObjectInfo(streams[0], CL_GL_OBJECT_BUFFER, (GLuint)inGLBuffer, (GLenum)0, 0) |
                   CheckGLObjectInfo(streams[2], CL_GL_OBJECT_BUFFER, (GLuint)outGLBuffer, (GLenum)0, 0) );
-    for(i=0;i<3;i++)
+
+    for (i = 0; i < 3; i++)
     {
-        clReleaseMemObject(streams[i]);
-        streams[i] = NULL;
+        streams[i].reset();
     }
 
     glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
@@ -285,10 +285,9 @@ int test_buffer_kernel(cl_context context, cl_command_queue queue, ExplicitType
         clP += get_explicit_type_size( vecType );
     }
 
-    for(i=0;i<3;i++)
+    for (i = 0; i < 3; i++)
     {
-        clReleaseMemObject(streams[i]);
-        streams[i] = NULL;
+        streams[i].reset();
     }
 
     glDeleteBuffers(1, &inGLBuffer);    inGLBuffer = 0;
-- 
cgit v1.2.3


From 4fb5deeec1e38bfa796b1cc0e93294ba1983b473 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 28 Sep 2021 11:19:40 -0600
Subject: api: Enable cl_khr_fp16 when using half types in kernel (#1327)

---
 test_conformance/api/test_kernel_arg_info.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 8073e0de..dddb4a23 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -167,7 +167,8 @@ static std::string generate_argument(const KernelArgInfo& kernel_arg)
 /* This function generates a kernel source and allows for multiple arguments to
  * be passed in and subsequently queried. */
 static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
-                                   const bool supports_3d_image_writes = false)
+                                   const bool supports_3d_image_writes = false,
+                                   const bool kernel_uses_half_type = false)
 {
 
     std::string ret;
@@ -175,6 +176,10 @@ static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
     {
         ret += "#pragma OPENCL EXTENSION cl_khr_3d_image_writes: enable\n";
     }
+    if (kernel_uses_half_type)
+    {
+        ret += "#pragma OPENCL EXTENSION cl_khr_fp16 : enable\n";
+    }
     ret += "kernel void get_kernel_arg_info(\n";
     for (int i = 0; i < all_args.size(); ++i)
     {
@@ -673,8 +678,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
                     if (param_size + total_param_size >= max_param_size
                         || all_args.size() == MAX_NUMBER_OF_KERNEL_ARGS)
                     {
-                        const std::string kernel_src =
-                            generate_kernel(all_args);
+                        const std::string kernel_src = generate_kernel(
+                            all_args, false, device_supports_half(deviceID));
                         failed_tests += compare_kernel_with_expected(
                             context, deviceID, kernel_src.c_str(),
                             expected_args);
@@ -696,7 +701,8 @@ static int run_scalar_vector_tests(cl_context context, cl_device_id deviceID)
             }
         }
     }
-    const std::string kernel_src = generate_kernel(all_args);
+    const std::string kernel_src =
+        generate_kernel(all_args, false, device_supports_half(deviceID));
     failed_tests += compare_kernel_with_expected(
         context, deviceID, kernel_src.c_str(), expected_args);
     return failed_tests;
-- 
cgit v1.2.3


From 2b770c4f348d9ad71a22c3b949a1cffe32e9d1f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Wed, 29 Sep 2021 12:38:42 +0100
Subject: Update cl_khr_integer_dot_product tests for v2 (#1317)

* Update cl_khr_integer_dot_product tests for v2

Signed-off-by: Kevin Petit <kevin.petit@arm.com>
Signed-off-by: Marco Cattani <marco.cattani@arm.com>
Change-Id: I97dbd820f1f32f6b377e47d0bf638f36bb91930a

* only query acceleration properties with v2+

Change-Id: I3f13a0cba7f1f686365b10adf81690e089cd3d74
---
 test_common/harness/deviceInfo.cpp                 | 34 +++++++++++
 test_common/harness/deviceInfo.h                   |  5 ++
 .../integer_ops/test_integer_dot_product.cpp       | 67 ++++++++++++++++++++++
 3 files changed, 106 insertions(+)

diff --git a/test_common/harness/deviceInfo.cpp b/test_common/harness/deviceInfo.cpp
index 287a1423..97ab8c85 100644
--- a/test_common/harness/deviceInfo.cpp
+++ b/test_common/harness/deviceInfo.cpp
@@ -63,6 +63,40 @@ int is_extension_available(cl_device_id device, const char *extensionName)
     return false;
 }
 
+cl_version get_extension_version(cl_device_id device, const char *extensionName)
+{
+    cl_int err;
+    size_t size;
+
+    err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, 0, nullptr,
+                          &size);
+    if (err != CL_SUCCESS)
+    {
+        throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_"
+                                 "VERSION) failed to return size\n");
+    }
+
+    std::vector<cl_name_version> extensions(size / sizeof(cl_name_version));
+    err = clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS_WITH_VERSION, size,
+                          extensions.data(), &size);
+    if (err != CL_SUCCESS)
+    {
+        throw std::runtime_error("clGetDeviceInfo(CL_DEVICE_EXTENSIONS_WITH_"
+                                 "VERSION) failed to return value\n");
+    }
+
+    for (auto &ext : extensions)
+    {
+        if (!strcmp(extensionName, ext.name))
+        {
+            return ext.version;
+        }
+    }
+
+    throw std::runtime_error("Extension " + std::string(extensionName)
+                             + " not supported by device!");
+}
+
 /* Returns a string containing the supported extensions list for a device. */
 std::string get_device_extensions_string(cl_device_id device)
 {
diff --git a/test_common/harness/deviceInfo.h b/test_common/harness/deviceInfo.h
index f8c55805..912dd198 100644
--- a/test_common/harness/deviceInfo.h
+++ b/test_common/harness/deviceInfo.h
@@ -31,6 +31,11 @@ std::string get_device_info_string(cl_device_id device,
 /* Determines if an extension is supported by a device. */
 int is_extension_available(cl_device_id device, const char *extensionName);
 
+/* Returns the version of the extension the device supports or throws an
+ * exception if the extension is not supported by the device. */
+cl_version get_extension_version(cl_device_id device,
+                                 const char *extensionName);
+
 /* Returns a string containing the supported extensions list for a device. */
 std::string get_device_extensions_string(cl_device_id device);
 
diff --git a/test_conformance/integer_ops/test_integer_dot_product.cpp b/test_conformance/integer_ops/test_integer_dot_product.cpp
index be25b320..602d59b6 100644
--- a/test_conformance/integer_ops/test_integer_dot_product.cpp
+++ b/test_conformance/integer_ops/test_integer_dot_product.cpp
@@ -336,6 +336,21 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context,
         return TEST_SKIPPED_ITSELF;
     }
 
+    Version deviceVersion = get_device_cl_version(deviceID);
+    cl_version extensionVersion;
+
+    if ((deviceVersion >= Version(3, 0))
+        || is_extension_available(deviceID, "cl_khr_extended_versioning"))
+    {
+        extensionVersion =
+            get_extension_version(deviceID, "cl_khr_integer_dot_product");
+    }
+    else
+    {
+        // Assume 1.0.0 is supported if the version can't be queried
+        extensionVersion = CL_MAKE_VERSION(1, 0, 0);
+    }
+
     cl_int error = CL_SUCCESS;
     int result = TEST_PASS;
 
@@ -346,12 +361,63 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context,
     test_error(
         error,
         "Unable to query CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR");
+
+    // Check that the required capabilities are reported
     test_assert_error(
         dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR,
         "When cl_khr_integer_dot_product is supported "
         "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR must be "
         "supported");
 
+    if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
+    {
+        test_assert_error(
+            dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR,
+            "When cl_khr_integer_dot_product is supported with version >= 2.0.0"
+            "CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR must be "
+            "supported");
+    }
+
+    // Check that acceleration properties can be queried
+    if (extensionVersion >= CL_MAKE_VERSION(2, 0, 0))
+    {
+        size_t size_ret;
+        error = clGetDeviceInfo(
+            deviceID,
+            CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, 0,
+            nullptr, &size_ret);
+        test_error(
+            error,
+            "Unable to query size of data returned by "
+            "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR");
+
+        cl_device_integer_dot_product_acceleration_properties_khr
+            accelerationProperties;
+        error = clGetDeviceInfo(
+            deviceID,
+            CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR,
+            sizeof(accelerationProperties), &accelerationProperties, nullptr);
+        test_error(error, "Unable to query 8-bit acceleration properties");
+
+        error = clGetDeviceInfo(
+            deviceID,
+            CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
+            0, nullptr, &size_ret);
+        test_error(
+            error,
+            "Unable to query size of data returned by "
+            "CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_"
+            "PACKED_KHR");
+
+        error = clGetDeviceInfo(
+            deviceID,
+            CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR,
+            sizeof(accelerationProperties), &accelerationProperties, nullptr);
+        test_error(error,
+                   "Unable to query 4x8-bit packed acceleration properties");
+    }
+
+    // Report when unknown capabilities are found
     if (dotCaps
         & ~(CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR
             | CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR))
@@ -359,6 +425,7 @@ int test_integer_dot_product(cl_device_id deviceID, cl_context context,
         log_info("NOTE: found an unknown / untested capability!\n");
     }
 
+    // Test built-in functions
     if (dotCaps & CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR)
     {
         result |= test_vectype<cl_uchar, cl_uint, 4>(deviceID, context, queue,
-- 
cgit v1.2.3


From 903f1bf65dfe15956295eb9379f5706568d858a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Thu, 30 Sep 2021 13:33:18 +0100
Subject: Report unsupported extended subgroup tests as skipped rather than
 passed (#1301)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Report unsupported extended subgroup tests as skipped rather than passed

Also don't check the presence of extensions for each sub-test.

Signed-off-by: Kévin Petit <kpet@free.fr>

* address review comments
---
 test_conformance/subgroups/subhelpers.h                 | 17 +----------------
 test_conformance/subgroups/test_subgroup_ballot.cpp     | 10 +++++++---
 .../subgroups/test_subgroup_clustered_reduce.cpp        | 12 +++++++-----
 .../subgroups/test_subgroup_extended_types.cpp          | 12 +++++++-----
 .../subgroups/test_subgroup_non_uniform_arithmetic.cpp  | 15 +++++++++------
 .../subgroups/test_subgroup_non_uniform_vote.cpp        | 13 +++++++------
 test_conformance/subgroups/test_subgroup_shuffle.cpp    | 10 +++++++---
 .../subgroups/test_subgroup_shuffle_relative.cpp        | 12 +++++++-----
 8 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 9232cded..0d497fb3 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -33,10 +33,9 @@ extern cl_half_rounding_mode g_rounding_mode;
 struct WorkGroupParams
 {
     WorkGroupParams(size_t gws, size_t lws,
-                    const std::vector<std::string> &req_ext = {},
                     const std::vector<uint32_t> &all_wim = {})
         : global_workgroup_size(gws), local_workgroup_size(lws),
-          required_extensions(req_ext), all_work_item_masks(all_wim)
+          all_work_item_masks(all_wim)
     {
         subgroup_size = 0;
         work_items_mask = 0;
@@ -49,7 +48,6 @@ struct WorkGroupParams
     uint32_t work_items_mask;
     int dynsc;
     bool use_core_subgroups;
-    std::vector<std::string> required_extensions;
     std::vector<uint32_t> all_work_item_masks;
 };
 
@@ -1297,19 +1295,6 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             }
         }
 
-        for (std::string extension : test_params.required_extensions)
-        {
-            if (!is_extension_available(device, extension.c_str()))
-            {
-                log_info("The extension %s not supported on this device. SKIP "
-                         "testing - kernel %s data type %s\n",
-                         extension.c_str(), kname, TypeManager<Ty>::name());
-                return TEST_PASS;
-            }
-            kernel_sstr << "#pragma OPENCL EXTENSION " + extension
-                    + ": enable\n";
-        }
-
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
                                 (void *)&platform, NULL);
         test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index f2e4060b..9a2da5d9 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -926,11 +926,15 @@ template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
 int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
                                    cl_command_queue queue, int num_elements)
 {
-    std::vector<std::string> required_extensions = { "cl_khr_subgroup_ballot" };
+    if (!is_extension_available(device, "cl_khr_subgroup_ballot"))
+    {
+        log_info("cl_khr_subgroup_ballot is not supported on this device, "
+                 "skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     constexpr size_t global_work_size = 170;
     constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
+    WorkGroupParams test_params(global_work_size, local_work_size);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     // non uniform broadcast functions
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 588e9cee..87507e37 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -305,13 +305,15 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device,
                                              cl_command_queue queue,
                                              int num_elements)
 {
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_clustered_reduce"
-    };
+    if (!is_extension_available(device, "cl_khr_subgroup_clustered_reduce"))
+    {
+        log_info("cl_khr_subgroup_clustered_reduce is not supported on this "
+                 "device, skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
+    WorkGroupParams test_params(global_work_size, local_work_size);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
index 98401b8e..b281f618 100644
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp
@@ -59,13 +59,15 @@ int test_subgroup_functions_extended_types(cl_device_id device,
                                            cl_command_queue queue,
                                            int num_elements)
 {
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_extended_types"
-    };
+    if (!is_extension_available(device, "cl_khr_subgroup_extended_types"))
+    {
+        log_info("cl_khr_subgroup_extended_types is not supported on this "
+                 "device, skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
+    WorkGroupParams test_params(global_work_size, local_work_size);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_broadcast_for_extended_type<cl_uint2>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
index eb46ff09..6c44249e 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
@@ -434,17 +434,20 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
                                                    cl_command_queue queue,
                                                    int num_elements)
 {
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_arithmetic"
-    };
+    if (!is_extension_available(device,
+                                "cl_khr_subgroup_non_uniform_arithmetic"))
+    {
+        log_info("cl_khr_subgroup_non_uniform_arithmetic is not supported on "
+                 "this device, skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
                                  0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
                                  0x00ffff00, 0x80000000, 0xaaaaaaaa };
 
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
+    WorkGroupParams test_params(global_work_size, local_work_size, masks);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
@@ -470,4 +473,4 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
 
     error |= run_functions_logical_and_or_xor_for_type<cl_int>(rft);
     return error;
-}
\ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index 2b00b4dd..484e9b6b 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -272,17 +272,18 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
                                              cl_command_queue queue,
                                              int num_elements)
 {
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_non_uniform_vote"
-    };
-
+    if (!is_extension_available(device, "cl_khr_subgroup_non_uniform_vote"))
+    {
+        log_info("cl_khr_subgroup_non_uniform_vote is not supported on this "
+                 "device, skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
                                  0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
                                  0x00ffff00, 0x80000000 };
     constexpr size_t global_work_size = 170;
     constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions, masks);
+    WorkGroupParams test_params(global_work_size, local_work_size, masks);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_vote_all_equal_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp
index 049f0982..37b27ced 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp
@@ -55,11 +55,15 @@ template <typename T> int run_shuffle_for_type(RunTestForType rft)
 int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
                                     cl_command_queue queue, int num_elements)
 {
-    std::vector<std::string> required_extensions{ "cl_khr_subgroup_shuffle" };
+    if (!is_extension_available(device, "cl_khr_subgroup_shuffle"))
+    {
+        log_info("cl_khr_subgroup_shuffle is not supported on this device, "
+                 "skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
+    WorkGroupParams test_params(global_work_size, local_work_size);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_shuffle_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
index 6000c970..11401e80 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
@@ -56,13 +56,15 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device,
                                              cl_command_queue queue,
                                              int num_elements)
 {
-    std::vector<std::string> required_extensions = {
-        "cl_khr_subgroup_shuffle_relative"
-    };
+    if (!is_extension_available(device, "cl_khr_subgroup_shuffle_relative"))
+    {
+        log_info("cl_khr_subgroup_shuffle_relative is not supported on this "
+                 "device, skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size,
-                                required_extensions);
+    WorkGroupParams test_params(global_work_size, local_work_size);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_shuffle_relative_for_type<cl_int>(rft);
-- 
cgit v1.2.3


From 92844bead1afdf75b56085c2cda34be27458a582 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Fri, 1 Oct 2021 12:28:37 +0200
Subject: Extended subgroups - use 128bit masks (#1215)

* Extended subgroups - use 128bit masks

* Refactoring to avoid kernels code duplication

* unification kernel names as test_ prefix +subgroups function name
* use string literals that improve readability
* use kernel templates that limit code duplication
* WorkGroupParams allows define default kernel - kernel template for multiple functions
* WorkGroupParams allows define  kernel for specific one subgroup function

Co-authored-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/subgroup_common_kernels.cpp          | 104 +----
 .../subgroups/subgroup_common_kernels.h            |  12 +-
 .../subgroups/subgroup_common_templates.h          |  98 +++--
 test_conformance/subgroups/subhelpers.h            | 181 ++++++++-
 test_conformance/subgroups/test_subgroup.cpp       |  47 +--
 .../subgroups/test_subgroup_ballot.cpp             | 425 ++++++++-------------
 .../subgroups/test_subgroup_clustered_reduce.cpp   | 176 ++-------
 .../subgroups/test_subgroup_extended_types.cpp     |  44 ++-
 .../test_subgroup_non_uniform_arithmetic.cpp       | 409 +++-----------------
 .../subgroups/test_subgroup_non_uniform_vote.cpp   |  93 ++---
 .../subgroups/test_subgroup_shuffle.cpp            |  29 +-
 .../subgroups/test_subgroup_shuffle_relative.cpp   |  28 +-
 12 files changed, 592 insertions(+), 1054 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_kernels.cpp b/test_conformance/subgroups/subgroup_common_kernels.cpp
index f8b24450..33a51637 100644
--- a/test_conformance/subgroups/subgroup_common_kernels.cpp
+++ b/test_conformance/subgroups/subgroup_common_kernels.cpp
@@ -15,92 +15,20 @@
 //
 #include "subgroup_common_kernels.h"
 
-const char* bcast_source =
-    "__kernel void test_bcast(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint which_sub_group_local_id = xy[gid].z;\n"
-    "    out[gid] = sub_group_broadcast(x, which_sub_group_local_id);\n"
 
-    "}\n";
-
-const char* redadd_source = "__kernel void test_redadd(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_add(in[gid]);\n"
-                            "}\n";
-
-const char* redmax_source = "__kernel void test_redmax(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_max(in[gid]);\n"
-                            "}\n";
-
-const char* redmin_source = "__kernel void test_redmin(const __global Type "
-                            "*in, __global int4 *xy, __global Type *out)\n"
-                            "{\n"
-                            "    int gid = get_global_id(0);\n"
-                            "    XY(xy,gid);\n"
-                            "    out[gid] = sub_group_reduce_min(in[gid]);\n"
-                            "}\n";
-
-const char* scinadd_source =
-    "__kernel void test_scinadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_add(in[gid]);\n"
-    "}\n";
-
-const char* scinmax_source =
-    "__kernel void test_scinmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_max(in[gid]);\n"
-    "}\n";
-
-const char* scinmin_source =
-    "__kernel void test_scinmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_inclusive_min(in[gid]);\n"
-    "}\n";
-
-const char* scexadd_source =
-    "__kernel void test_scexadd(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_add(in[gid]);\n"
-    "}\n";
-
-const char* scexmax_source =
-    "__kernel void test_scexmax(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_max(in[gid]);\n"
-    "}\n";
-
-const char* scexmin_source =
-    "__kernel void test_scexmin(const __global Type *in, __global int4 *xy, "
-    "__global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    out[gid] = sub_group_scan_exclusive_min(in[gid]);\n"
-    "}\n";
+std::string sub_group_reduction_scan_source = R"(
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+        int gid = get_global_id(0);
+        XY(xy,gid);
+        out[gid] = %s(in[gid]);
+    }
+)";
+
+std::string sub_group_generic_source = R"(
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+        int gid = get_global_id(0);
+        XY(xy,gid);
+        Type x = in[gid];
+        out[gid] = %s(x, xy[gid].z);
+    }
+)";
\ No newline at end of file
diff --git a/test_conformance/subgroups/subgroup_common_kernels.h b/test_conformance/subgroups/subgroup_common_kernels.h
index 8ae97d9a..bf2210ef 100644
--- a/test_conformance/subgroups/subgroup_common_kernels.h
+++ b/test_conformance/subgroups/subgroup_common_kernels.h
@@ -18,15 +18,7 @@
 #include "subhelpers.h"
 
 
-extern const char* bcast_source;
-extern const char* redadd_source;
-extern const char* redmax_source;
-extern const char* redmin_source;
-extern const char* scinadd_source;
-extern const char* scinmax_source;
-extern const char* scinmin_source;
-extern const char* scexadd_source;
-extern const char* scexmax_source;
-extern const char* scexmin_source;
+extern std::string sub_group_reduction_scan_source;
+extern std::string sub_group_generic_source;
 
 #endif
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 4333e95b..5c5f9560 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -17,13 +17,10 @@
 #define SUBGROUPCOMMONTEMPLATES_H
 
 #include "typeWrappers.h"
-#include <bitset>
 #include "CL/cl_half.h"
 #include "subhelpers.h"
-
 #include <set>
 
-typedef std::bitset<128> bs128;
 static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
                                   const std::string &mask_type,
                                   cl_uint max_sub_group_size)
@@ -577,16 +574,21 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
         ng = ng / nw;
         std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
+        test_params.work_items_mask.any()
+            ? func_name = "sub_group_non_uniform_scan_exclusive"
+            : func_name = "sub_group_scan_exclusive";
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
         log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
+                 "size = %d \n",
+                 test_params.global_workgroup_size, nw, ns);
+        if (test_params.work_items_mask.any())
+        {
+            log_info("               work items mask: %s\n",
+                     test_params.work_items_mask.to_string().c_str());
+        }
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -597,18 +599,22 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
+        bs128 work_items_mask = test_params.work_items_mask;
         int nj = (nw + ns - 1) / ns;
         Ty tr, rr;
         ng = ng / nw;
 
         std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_exclusive"
-                        : func_name = "sub_group_scan_exclusive";
+        test_params.work_items_mask.any()
+            ? func_name = "sub_group_non_uniform_scan_exclusive"
+            : func_name = "sub_group_scan_exclusive";
+
 
-        uint32_t use_work_items_mask;
         // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+        if (!work_items_mask.any())
+        {
+            work_items_mask.set();
+        }
         for (k = 0; k < ng; ++k)
         { // for each work_group
             // Map to array indexed to array indexed by local ID and sub group
@@ -624,8 +630,7 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
                 std::set<int> active_work_items;
                 for (i = 0; i < n; ++i)
                 {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
+                    if (work_items_mask.test(i))
                     {
                         active_work_items.insert(i);
                     }
@@ -688,18 +693,23 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
         ng = ng / nw;
         std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
+        test_params.work_items_mask.any()
+            ? func_name = "sub_group_non_uniform_scan_inclusive"
+            : func_name = "sub_group_scan_inclusive";
 
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
         log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
+                 "size = %d \n",
+                 test_params.global_workgroup_size, nw, ns);
+        if (test_params.work_items_mask.any())
+        {
+            log_info("               work items mask: %s\n",
+                     test_params.work_items_mask.to_string().c_str());
+        }
     }
 
     static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -709,18 +719,22 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
+        bs128 work_items_mask = test_params.work_items_mask;
+
         int nj = (nw + ns - 1) / ns;
         Ty tr, rr;
         ng = ng / nw;
 
         std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_scan_inclusive"
-                        : func_name = "sub_group_scan_inclusive";
+        work_items_mask.any()
+            ? func_name = "sub_group_non_uniform_scan_inclusive"
+            : func_name = "sub_group_scan_inclusive";
 
-        uint32_t use_work_items_mask;
         // for uniform case take into consideration all workitems
-        use_work_items_mask = !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+        if (!work_items_mask.any())
+        {
+            work_items_mask.set();
+        }
         // std::bitset<32> mask32(use_work_items_mask);
         // for (int k) mask32.count();
         for (k = 0; k < ng; ++k)
@@ -740,8 +754,7 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
 
                 for (i = 0; i < n; ++i)
                 {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
+                    if (work_items_mask.test(i))
                     {
                         if (catch_frist_active == -1)
                         {
@@ -807,17 +820,22 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
         ng = ng / nw;
         std::string func_name;
 
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
+        test_params.work_items_mask.any()
+            ? func_name = "sub_group_non_uniform_reduce"
+            : func_name = "sub_group_reduce";
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
         log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x \n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask);
+                 "size = %d \n",
+                 test_params.global_workgroup_size, nw, ns);
+        if (test_params.work_items_mask.any())
+        {
+            log_info("               work items mask: %s\n",
+                     test_params.work_items_mask.to_string().c_str());
+        }
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -828,14 +846,14 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
+        bs128 work_items_mask = test_params.work_items_mask;
         int nj = (nw + ns - 1) / ns;
         ng = ng / nw;
         Ty tr, rr;
 
         std::string func_name;
-        work_items_mask ? func_name = "sub_group_non_uniform_reduce"
-                        : func_name = "sub_group_reduce";
+        work_items_mask.any() ? func_name = "sub_group_non_uniform_reduce"
+                              : func_name = "sub_group_reduce";
 
         for (k = 0; k < ng; ++k)
         {
@@ -847,9 +865,10 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
                 my[j] = y[j];
             }
 
-            uint32_t use_work_items_mask;
-            use_work_items_mask =
-                !work_items_mask ? 0xFFFFFFFF : work_items_mask;
+            if (!work_items_mask.any())
+            {
+                work_items_mask.set();
+            }
 
             for (j = 0; j < nj; ++j)
             {
@@ -859,8 +878,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
                 int catch_frist_active = -1;
                 for (i = 0; i < n; ++i)
                 {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (use_work_items_mask & check_work_item)
+                    if (work_items_mask.test(i))
                     {
                         if (catch_frist_active == -1)
                         {
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 0d497fb3..6d32928a 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -24,31 +24,172 @@
 #include <limits>
 #include <vector>
 #include <type_traits>
+#include <bitset>
+#include <regex>
+#include <map>
 
 #define NR_OF_ACTIVE_WORK_ITEMS 4
 
 extern MTdata gMTdata;
+typedef std::bitset<128> bs128;
 extern cl_half_rounding_mode g_rounding_mode;
 
 struct WorkGroupParams
 {
     WorkGroupParams(size_t gws, size_t lws,
-                    const std::vector<uint32_t> &all_wim = {})
+                    bool use_mask = false)
         : global_workgroup_size(gws), local_workgroup_size(lws),
-          all_work_item_masks(all_wim)
+          use_masks(use_mask)
     {
         subgroup_size = 0;
         work_items_mask = 0;
         use_core_subgroups = true;
         dynsc = 0;
+        load_masks();
     }
     size_t global_workgroup_size;
     size_t local_workgroup_size;
     size_t subgroup_size;
-    uint32_t work_items_mask;
+    bs128 work_items_mask;
     int dynsc;
     bool use_core_subgroups;
-    std::vector<uint32_t> all_work_item_masks;
+    std::vector<bs128> all_work_item_masks;
+    bool use_masks;
+    void save_kernel_source(const std::string &source, std::string name = "")
+    {
+        if (name == "")
+        {
+            name = "default";
+        }
+        if (kernel_function_name.find(name) != kernel_function_name.end())
+        {
+            log_info("Kernel definition duplication. Source will be "
+                     "overwritten for function name %s",
+                     name.c_str());
+        }
+        kernel_function_name[name] = source;
+    };
+    // return specific defined kernel or default.
+    std::string get_kernel_source(std::string name)
+    {
+        if (kernel_function_name.find(name) == kernel_function_name.end())
+        {
+            return kernel_function_name["default"];
+        }
+        return kernel_function_name[name];
+    }
+
+
+private:
+    std::map<std::string, std::string> kernel_function_name;
+    void load_masks()
+    {
+        if (use_masks)
+        {
+            // 1 in string will be set 1, 0 will be set 0
+            bs128 mask_0xf0f0f0f0("11110000111100001111000011110000"
+                                  "11110000111100001111000011110000"
+                                  "11110000111100001111000011110000"
+                                  "11110000111100001111000011110000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0xf0f0f0f0);
+            // 1 in string will be set 0, 0 will be set 1
+            bs128 mask_0x0f0f0f0f("11110000111100001111000011110000"
+                                  "11110000111100001111000011110000"
+                                  "11110000111100001111000011110000"
+                                  "11110000111100001111000011110000",
+                                  128, '1', '0');
+            all_work_item_masks.push_back(mask_0x0f0f0f0f);
+            bs128 mask_0x5555aaaa("10101010101010101010101010101010"
+                                  "10101010101010101010101010101010"
+                                  "10101010101010101010101010101010"
+                                  "10101010101010101010101010101010",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0x5555aaaa);
+            bs128 mask_0xaaaa5555("10101010101010101010101010101010"
+                                  "10101010101010101010101010101010"
+                                  "10101010101010101010101010101010"
+                                  "10101010101010101010101010101010",
+                                  128, '1', '0');
+            all_work_item_masks.push_back(mask_0xaaaa5555);
+            // 0x0f0ff0f0
+            bs128 mask_0x0f0ff0f0("00001111000011111111000011110000"
+                                  "00001111000011111111000011110000"
+                                  "00001111000011111111000011110000"
+                                  "00001111000011111111000011110000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0x0f0ff0f0);
+            // 0xff0000ff
+            bs128 mask_0xff0000ff("11111111000000000000000011111111"
+                                  "11111111000000000000000011111111"
+                                  "11111111000000000000000011111111"
+                                  "11111111000000000000000011111111",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0xff0000ff);
+            // 0xff00ff00
+            bs128 mask_0xff00ff00("11111111000000001111111100000000"
+                                  "11111111000000001111111100000000"
+                                  "11111111000000001111111100000000"
+                                  "11111111000000001111111100000000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0xff00ff00);
+            // 0x00ffff00
+            bs128 mask_0x00ffff00("00000000111111111111111100000000"
+                                  "00000000111111111111111100000000"
+                                  "00000000111111111111111100000000"
+                                  "00000000111111111111111100000000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0x00ffff00);
+            // 0x80 1 workitem highest id for 8 subgroup size
+            bs128 mask_0x80808080("10000000100000001000000010000000"
+                                  "10000000100000001000000010000000"
+                                  "10000000100000001000000010000000"
+                                  "10000000100000001000000010000000",
+                                  128, '0', '1');
+
+            all_work_item_masks.push_back(mask_0x80808080);
+            // 0x8000 1 workitem highest id for 16 subgroup size
+            bs128 mask_0x80008000("10000000000000001000000000000000"
+                                  "10000000000000001000000000000000"
+                                  "10000000000000001000000000000000"
+                                  "10000000000000001000000000000000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0x80008000);
+            // 0x80000000 1 workitem highest id for 32 subgroup size
+            bs128 mask_0x80000000("10000000000000000000000000000000"
+                                  "10000000000000000000000000000000"
+                                  "10000000000000000000000000000000"
+                                  "10000000000000000000000000000000",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0x80000000);
+            // 0x80000000 00000000 1 workitem highest id for 64 subgroup size
+            // 0x80000000 1 workitem highest id for 32 subgroup size
+            bs128 mask_0x8000000000000000("10000000000000000000000000000000"
+                                          "00000000000000000000000000000000"
+                                          "10000000000000000000000000000000"
+                                          "00000000000000000000000000000000",
+                                          128, '0', '1');
+
+            all_work_item_masks.push_back(mask_0x8000000000000000);
+            // 0x80000000 00000000 00000000 00000000 1 workitem highest id for
+            // 128 subgroup size
+            bs128 mask_0x80000000000000000000000000000000(
+                "10000000000000000000000000000000"
+                "00000000000000000000000000000000"
+                "00000000000000000000000000000000"
+                "00000000000000000000000000000000",
+                128, '0', '1');
+            all_work_item_masks.push_back(
+                mask_0x80000000000000000000000000000000);
+
+            bs128 mask_0xffffffff("11111111111111111111111111111111"
+                                  "11111111111111111111111111111111"
+                                  "11111111111111111111111111111111"
+                                  "11111111111111111111111111111111",
+                                  128, '0', '1');
+            all_work_item_masks.push_back(mask_0xffffffff);
+        }
+    }
 };
 
 enum class SubgroupsBroadcastOp
@@ -1267,11 +1408,23 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         std::vector<Ty> mapout;
         mapout.resize(local);
         std::stringstream kernel_sstr;
-        if (test_params.work_items_mask != 0)
+        if (test_params.use_masks)
         {
-            kernel_sstr << "#define WORK_ITEMS_MASK ";
-            kernel_sstr << "0x" << std::hex << test_params.work_items_mask
-                        << "\n";
+            // Prapare uint4 type to store bitmask on kernel OpenCL C side
+            // To keep order the first characet in string is the lowest bit
+            // there was a need to give such offset to bitset constructor
+            // (first highest offset = 96)
+            std::bitset<32> bits_1_32(test_params.work_items_mask.to_string(),
+                                      96, 32);
+            std::bitset<32> bits_33_64(test_params.work_items_mask.to_string(),
+                                       64, 32);
+            std::bitset<32> bits_65_96(test_params.work_items_mask.to_string(),
+                                       32, 32);
+            std::bitset<32> bits_97_128(test_params.work_items_mask.to_string(),
+                                        0, 32);
+            kernel_sstr << "global uint4 work_item_mask_vector = (uint4)(0b"
+                        << bits_1_32 << ",0b" << bits_33_64 << ",0b"
+                        << bits_65_96 << ",0b" << bits_97_128 << ");\n";
         }
 
 
@@ -1452,18 +1605,24 @@ struct RunTestForType
           num_elements_(num_elements), test_params_(test_params)
     {}
     template <typename T, typename U>
-    int run_impl(const char *kernel_name, const char *source)
+    int run_impl(const std::string &function_name)
     {
         int error = TEST_PASS;
+        std::string source =
+            std::regex_replace(test_params_.get_kernel_source(function_name),
+                               std::regex("\\%s"), function_name);
+        std::string kernel_name = "test_" + function_name;
         if (test_params_.all_work_item_masks.size() > 0)
         {
             error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
-                                     kernel_name, source, test_params_);
+                                     kernel_name.c_str(), source.c_str(),
+                                     test_params_);
         }
         else
         {
             error = test<T, U>::run(device_, context_, queue_, num_elements_,
-                                    kernel_name, source, test_params_);
+                                    kernel_name.c_str(), source.c_str(),
+                                    test_params_);
         }
 
         return error;
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
index c0e49524..63bfc453 100644
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ b/test_conformance/subgroups/test_subgroup.cpp
@@ -150,25 +150,25 @@ template <typename T>
 int run_broadcast_scan_reduction_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                            redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
+        "sub_group_broadcast");
+    error |=
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("sub_group_reduce_add");
+    error |=
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("sub_group_reduce_max");
+    error |=
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("sub_group_reduce_min");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
+        "sub_group_scan_inclusive_add");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
+        "sub_group_scan_inclusive_max");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
+        "sub_group_scan_inclusive_min");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
+        "sub_group_scan_exclusive_add");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
+        "sub_group_scan_exclusive_max");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
+        "sub_group_scan_exclusive_min");
     return error;
 }
 
@@ -181,11 +181,14 @@ int test_subgroup_functions(cl_device_id device, cl_context context,
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
     WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_reduction_scan_source);
+    test_params.save_kernel_source(sub_group_generic_source,
+                                   "sub_group_broadcast");
+
     RunTestForType rft(device, context, queue, num_elements, test_params);
     int error =
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("test_any", any_source);
-    error |=
-        rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("test_all", all_source);
+        rft.run_impl<cl_int, AA<NonUniformVoteOp::any>>("sub_group_any");
+    error |= rft.run_impl<cl_int, AA<NonUniformVoteOp::all>>("sub_group_all");
     error |= run_broadcast_scan_reduction_for_type<cl_int>(rft);
     error |= run_broadcast_scan_reduction_for_type<cl_uint>(rft);
     error |= run_broadcast_scan_reduction_for_type<cl_long>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 9a2da5d9..2bd54e43 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -684,239 +684,127 @@ template <typename Ty, BallotOp operation> struct SMASK
     }
 };
 
-static const char *bcast_non_uniform_source =
-    "__kernel void test_bcast_non_uniform(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "        out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);\n"
-    "    }\n"
-    "}\n";
-
-static const char *bcast_first_source =
-    "__kernel void test_bcast_first(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    } else {\n"
-    "       out[gid] = sub_group_broadcast_first(x);\n"
-    "    }\n"
-    "}\n";
-
-static const char *ballot_bit_count_source =
-    "__kernel void test_sub_group_ballot_bit_count(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_bit_count(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_inclusive_scan_source =
-    "__kernel void test_sub_group_ballot_inclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_inclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_exclusive_scan_source =
-    "__kernel void test_sub_group_ballot_exclusive_scan(const __global Type "
-    "*in, __global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_exclusive_scan(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_find_lsb_source =
-    "__kernel void test_sub_group_ballot_find_lsb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);\n"
-    "    value = (uint4)(sub_group_ballot_find_lsb(x),0,0,0);\n"
-    "    out[gid] = value;\n"
-    "}\n";
-
-static const char *ballot_find_msb_source =
-    "__kernel void test_sub_group_ballot_find_msb(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(0,0,0,0);"
-    "    value = (uint4)(sub_group_ballot_find_msb(x),0,0,0);"
-    "    out[gid] = value ;"
-    "}\n";
-
-static const char *get_subgroup_ge_mask_source =
-    "__kernel void test_get_sub_group_ge_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_ge_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_gt_mask_source =
-    "__kernel void test_get_sub_group_gt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_gt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_le_mask_source =
-    "__kernel void test_get_sub_group_le_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_le_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_lt_mask_source =
-    "__kernel void test_get_sub_group_lt_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_lt_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *get_subgroup_eq_mask_source =
-    "__kernel void test_get_sub_group_eq_mask(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].z = get_max_sub_group_size();\n"
-    "    Type x = in[gid];\n"
-    "    uint4 mask = get_sub_group_eq_mask();"
-    "    out[gid] = mask;\n"
-    "}\n";
-
-static const char *ballot_source =
-    "__kernel void test_sub_group_ballot(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "uint4 full_ballot = sub_group_ballot(1);\n"
-    "uint divergence_mask;\n"
-    "uint4 partial_ballot;\n"
-    "uint gid = get_global_id(0);"
-    "XY(xy,gid);\n"
-    "if (get_sub_group_local_id() & 1) {\n"
-    "    divergence_mask = 0xaaaaaaaa;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "} else {\n"
-    "    divergence_mask = 0x55555555;\n"
-    "    partial_ballot = sub_group_ballot(1);\n"
-    "}\n"
-    " size_t lws = get_local_size(0);\n"
-    "uint4 masked_ballot = full_ballot;\n"
-    "masked_ballot.x &= divergence_mask;\n"
-    "masked_ballot.y &= divergence_mask;\n"
-    "masked_ballot.z &= divergence_mask;\n"
-    "masked_ballot.w &= divergence_mask;\n"
-    "out[gid] = all(masked_ballot == partial_ballot);\n"
-
-    "} \n";
-
-static const char *ballot_source_inverse =
-    "__kernel void test_sub_group_ballot_inverse(const __global "
-    "Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "        uint4 partial_ballot_mask = "
-    "(uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);"
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,1);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,1);\n"
-    "        }\n"
-    "    } else {\n"
-    "       uint4 partial_ballot_mask = "
-    "(uint4)(0x55555555,0x55555555,0x55555555,0x55555555);"
-    "        if (sub_group_inverse_ballot(partial_ballot_mask)) {\n"
-    "            value = (uint4)(1,0,0,2);\n"
-    "        } else {\n"
-    "            value = (uint4)(0,0,0,2);\n"
-    "        }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
+std::string sub_group_non_uniform_broadcast_source = R"(
+__kernel void test_sub_group_non_uniform_broadcast(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    Type x = in[gid];
+    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+        out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].z);
+    } else {
+        out[gid] = sub_group_non_uniform_broadcast(x, xy[gid].w);
+    }
+}
+)";
+std::string sub_group_broadcast_first_source = R"(
+__kernel void test_sub_group_broadcast_first(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    Type x = in[gid];
+    if (xy[gid].x < NR_OF_ACTIVE_WORK_ITEMS) {
+        out[gid] = sub_group_broadcast_first(x);;
+    } else {
+        out[gid] = sub_group_broadcast_first(x);;
+    }
+}
+)";
+std::string sub_group_ballot_bit_scan_find_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    Type x = in[gid];
+    uint4 value = (uint4)(0,0,0,0);
+    value = (uint4)(%s(x),0,0,0);
+    out[gid] = value;
+}
+)";
+std::string sub_group_ballot_mask_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    xy[gid].z = get_max_sub_group_size();
+    Type x = in[gid];
+    uint4 mask = %s();
+    out[gid] = mask;
+}
+)";
+std::string sub_group_ballot_source = R"(
+__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out) {
+    uint4 full_ballot = sub_group_ballot(1);
+    uint divergence_mask;
+    uint4 partial_ballot;
+    uint gid = get_global_id(0);
+    XY(xy,gid);
+    if (get_sub_group_local_id() & 1) {
+        divergence_mask = 0xaaaaaaaa;
+        partial_ballot = sub_group_ballot(1);
+    } else {
+        divergence_mask = 0x55555555;
+        partial_ballot = sub_group_ballot(1);
+    }
+     size_t lws = get_local_size(0);
+    uint4 masked_ballot = full_ballot;
+    masked_ballot.x &= divergence_mask;
+    masked_ballot.y &= divergence_mask;
+    masked_ballot.z &= divergence_mask;
+    masked_ballot.w &= divergence_mask;
+    out[gid] = all(masked_ballot == partial_ballot);
 
-static const char *ballot_bit_extract_source =
-    "__kernel void test_sub_group_ballot_bit_extract(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    uint index = xy[gid].z;\n"
-    "    uint4 value = (uint4)(10,0,0,0);\n"
-    "    if (get_sub_group_local_id() & 1) {"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].z)) {\n"
-    "           value = (uint4)(1,0,0,1);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,1);\n"
-    "       }\n"
-    "    } else {\n"
-    "       if (sub_group_ballot_bit_extract(x, xy[gid].w)) {\n"
-    "           value = (uint4)(1,0,0,2);\n"
-    "       } else {\n"
-    "           value = (uint4)(0,0,0,2);\n"
-    "       }\n"
-    "    }\n"
-    "    out[gid] = value;\n"
-    "}\n";
+}
+)";
+std::string sub_group_inverse_ballot_source = R"(
+__kernel void test_sub_group_inverse_ballot(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    Type x = in[gid];
+    uint4 value = (uint4)(10,0,0,0);
+    if (get_sub_group_local_id() & 1) {
+        uint4 partial_ballot_mask = (uint4)(0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA,0xAAAAAAAA);
+        if (sub_group_inverse_ballot(partial_ballot_mask)) {
+            value = (uint4)(1,0,0,1);
+        } else {
+            value = (uint4)(0,0,0,1);
+        }
+    } else {
+        uint4 partial_ballot_mask = (uint4)(0x55555555,0x55555555,0x55555555,0x55555555);
+        if (sub_group_inverse_ballot(partial_ballot_mask)) {
+            value = (uint4)(1,0,0,2);
+        } else {
+            value = (uint4)(0,0,0,2);
+        }
+    }
+    out[gid] = value;
+}
+)";
+std::string sub_group_ballot_bit_extract_source = R"(
+ __kernel void test_sub_group_ballot_bit_extract(const __global Type *in, __global int4 *xy, __global Type *out) {
+    int gid = get_global_id(0);
+    XY(xy,gid);
+    Type x = in[gid];
+    uint index = xy[gid].z;
+    uint4 value = (uint4)(10,0,0,0);
+    if (get_sub_group_local_id() & 1) {
+        if (sub_group_ballot_bit_extract(x, xy[gid].z)) {
+            value = (uint4)(1,0,0,1);
+        } else {
+            value = (uint4)(0,0,0,1);
+        }
+    } else {
+        if (sub_group_ballot_bit_extract(x, xy[gid].w)) {
+            value = (uint4)(1,0,0,2);
+        } else {
+            value = (uint4)(0,0,0,2);
+        }
+    }
+    out[gid] = value;
+}
+)";
 
 template <typename T> int run_non_uniform_broadcast_for_type(RunTestForType rft)
 {
     int error =
         rft.run_impl<T, BC<T, SubgroupsBroadcastOp::non_uniform_broadcast>>(
-            "test_bcast_non_uniform", bcast_non_uniform_source);
+            "sub_group_non_uniform_broadcast");
     return error;
 }
 
@@ -932,9 +820,15 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
                  "skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
+
     constexpr size_t global_work_size = 170;
     constexpr size_t local_work_size = 64;
     WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_ballot_mask_source);
+    test_params.save_kernel_source(sub_group_non_uniform_broadcast_source,
+                                   "sub_group_non_uniform_broadcast");
+    test_params.save_kernel_source(sub_group_broadcast_first_source,
+                                   "sub_group_broadcast_first");
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     // non uniform broadcast functions
@@ -1018,76 +912,87 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
     // broadcast first functions
     error |=
         rft.run_impl<cl_int, BC<cl_int, SubgroupsBroadcastOp::broadcast_first>>(
-            "test_bcast_first", bcast_first_source);
+            "sub_group_broadcast_first");
     error |= rft.run_impl<cl_uint,
                           BC<cl_uint, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_long,
                           BC<cl_long, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_ulong,
                           BC<cl_ulong, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_short,
                           BC<cl_short, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_ushort,
                           BC<cl_ushort, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_char,
                           BC<cl_char, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_uchar,
                           BC<cl_uchar, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_float,
                           BC<cl_float, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<cl_double,
                           BC<cl_double, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
     error |= rft.run_impl<
         subgroups::cl_half,
         BC<subgroups::cl_half, SubgroupsBroadcastOp::broadcast_first>>(
-        "test_bcast_first", bcast_first_source);
+        "sub_group_broadcast_first");
 
     // mask functions
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::eq_mask>>(
-        "test_get_sub_group_eq_mask", get_subgroup_eq_mask_source);
+        "get_sub_group_eq_mask");
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::ge_mask>>(
-        "test_get_sub_group_ge_mask", get_subgroup_ge_mask_source);
+        "get_sub_group_ge_mask");
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::gt_mask>>(
-        "test_get_sub_group_gt_mask", get_subgroup_gt_mask_source);
+        "get_sub_group_gt_mask");
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::le_mask>>(
-        "test_get_sub_group_le_mask", get_subgroup_le_mask_source);
+        "get_sub_group_le_mask");
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
-        "test_get_sub_group_lt_mask", get_subgroup_lt_mask_source);
+        "get_sub_group_lt_mask");
 
     // ballot functions
-    error |= rft.run_impl<cl_uint, BALLOT<cl_uint>>("test_sub_group_ballot",
-                                                    ballot_source);
-    error |= rft.run_impl<cl_uint4,
-                          BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
-        "test_sub_group_ballot_inverse", ballot_source_inverse);
-    error |= rft.run_impl<
+    WorkGroupParams test_params_ballot(global_work_size, local_work_size);
+    test_params_ballot.save_kernel_source(
+        sub_group_ballot_bit_scan_find_source);
+    test_params_ballot.save_kernel_source(sub_group_ballot_source,
+                                          "sub_group_ballot");
+    test_params_ballot.save_kernel_source(sub_group_inverse_ballot_source,
+                                          "sub_group_inverse_ballot");
+    test_params_ballot.save_kernel_source(sub_group_ballot_bit_extract_source,
+                                          "sub_group_ballot_bit_extract");
+    RunTestForType rft_ballot(device, context, queue, num_elements,
+                              test_params_ballot);
+    error |= rft_ballot.run_impl<cl_uint, BALLOT<cl_uint>>("sub_group_ballot");
+    error |=
+        rft_ballot.run_impl<cl_uint4,
+                            BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
+            "sub_group_inverse_ballot");
+    error |= rft_ballot.run_impl<
         cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
-        "test_sub_group_ballot_bit_extract", ballot_bit_extract_source);
-    error |= rft.run_impl<
+        "sub_group_ballot_bit_extract");
+    error |= rft_ballot.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
-        "test_sub_group_ballot_bit_count", ballot_bit_count_source);
-    error |= rft.run_impl<
+        "sub_group_ballot_bit_count");
+    error |= rft_ballot.run_impl<
         cl_uint4,
         BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
-        "test_sub_group_ballot_inclusive_scan", ballot_inclusive_scan_source);
-    error |= rft.run_impl<
+        "sub_group_ballot_inclusive_scan");
+    error |= rft_ballot.run_impl<
         cl_uint4,
         BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
-        "test_sub_group_ballot_exclusive_scan", ballot_exclusive_scan_source);
-    error |= rft.run_impl<
+        "sub_group_ballot_exclusive_scan");
+    error |= rft_ballot.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
-        "test_sub_group_ballot_find_lsb", ballot_find_lsb_source);
-    error |= rft.run_impl<
+        "sub_group_ballot_find_lsb");
+    error |= rft_ballot.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
-        "test_sub_group_ballot_find_msb", ballot_find_msb_source);
+        "sub_group_ballot_find_msb");
     return error;
 }
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 87507e37..11fcebc4 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -22,149 +22,17 @@
 #define CLUSTER_SIZE_STR "4"
 
 namespace {
-static const char *redadd_clustered_source =
-    "__kernel void test_redadd_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_add(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmax_clustered_source =
-    "__kernel void test_redmax_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_max(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmin_clustered_source =
-    "__kernel void test_redmin_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_min(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redmul_clustered_source =
-    "__kernel void test_redmul_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_mul(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redand_clustered_source =
-    "__kernel void test_redand_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_and(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redor_clustered_source =
-    "__kernel void test_redor_clustered(const __global Type *in, __global int4 "
-    "*xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_or(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redxor_clustered_source =
-    "__kernel void test_redxor_clustered(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = sub_group_clustered_reduce_xor(in[gid], " CLUSTER_SIZE_STR
-    ");\n"
-    "}\n";
-
-static const char *redand_clustered_logical_source =
-    "__kernel void test_redand_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_and(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
-static const char *redor_clustered_logical_source =
-    "__kernel void test_redor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if (sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_or(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
-static const char *redxor_clustered_logical_source =
-    "__kernel void test_redxor_clustered_logical(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    xy[gid].w = 0;\n"
-    "    if ( sizeof(in[gid]) == "
-    "sizeof(sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR
-    ")))\n"
-    "    {xy[gid].w = sizeof(in[gid]);}\n"
-    "    out[gid] = "
-    "sub_group_clustered_reduce_logical_xor(in[gid], " CLUSTER_SIZE_STR ");\n"
-    "}\n";
-
+std::string sub_group_clustered_reduce_source = R"(
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+        int gid = get_global_id(0);
+        XY(xy,gid);
+        xy[gid].w = 0;
+        if (sizeof(in[gid]) == sizeof(%s(in[gid], )" CLUSTER_SIZE_STR R"())) {
+            xy[gid].w = sizeof(in[gid]);
+        }
+        out[gid] = %s(in[gid], )" CLUSTER_SIZE_STR R"();
+}       
+)";
 
 // DESCRIPTION:
 // Test for reduce cluster functions
@@ -267,34 +135,34 @@ template <typename T>
 int run_cluster_red_add_max_min_mul_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::add_>>(
-        "test_redadd_clustered", redadd_clustered_source);
+        "sub_group_clustered_reduce_add");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::max_>>(
-        "test_redmax_clustered", redmax_clustered_source);
+        "sub_group_clustered_reduce_max");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::min_>>(
-        "test_redmin_clustered", redmin_clustered_source);
+        "sub_group_clustered_reduce_min");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::mul_>>(
-        "test_redmul_clustered", redmul_clustered_source);
+        "sub_group_clustered_reduce_mul");
     return error;
 }
 template <typename T> int run_cluster_and_or_xor_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::and_>>(
-        "test_redand_clustered", redand_clustered_source);
+        "sub_group_clustered_reduce_and");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::or_>>(
-        "test_redor_clustered", redor_clustered_source);
+        "sub_group_clustered_reduce_or");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::xor_>>(
-        "test_redxor_clustered", redxor_clustered_source);
+        "sub_group_clustered_reduce_xor");
     return error;
 }
 template <typename T>
 int run_cluster_logical_and_or_xor_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_and>>(
-        "test_redand_clustered_logical", redand_clustered_logical_source);
+        "sub_group_clustered_reduce_logical_and");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_or>>(
-        "test_redor_clustered_logical", redor_clustered_logical_source);
+        "sub_group_clustered_reduce_logical_or");
     error |= rft.run_impl<T, RED_CLU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_clustered_logical", redxor_clustered_logical_source);
+        "sub_group_clustered_reduce_logical_xor");
 
     return error;
 }
@@ -311,9 +179,11 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device,
                  "device, skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
+
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
     WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_clustered_reduce_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_cluster_red_add_max_min_mul_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
index b281f618..dbe24623 100644
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp
@@ -24,30 +24,30 @@ namespace {
 template <typename T> int run_broadcast_for_extended_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, BC<T, SubgroupsBroadcastOp::broadcast>>(
-        "test_bcast", bcast_source);
+        "sub_group_broadcast");
     return error;
 }
 
 template <typename T> int run_scan_reduction_for_type(RunTestForType rft)
 {
-    int error = rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("test_redadd",
-                                                               redadd_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("test_redmax",
-                                                            redmax_source);
-    error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("test_redmin",
-                                                            redmin_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>("test_scinadd",
-                                                             scinadd_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>("test_scinmax",
-                                                             scinmax_source);
-    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>("test_scinmin",
-                                                             scinmin_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>("test_scexadd",
-                                                             scexadd_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>("test_scexmax",
-                                                             scexmax_source);
-    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>("test_scexmin",
-                                                             scexmin_source);
+    int error =
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>("sub_group_reduce_add");
+    error |=
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>("sub_group_reduce_max");
+    error |=
+        rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>("sub_group_reduce_min");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
+        "sub_group_scan_inclusive_add");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
+        "sub_group_scan_inclusive_max");
+    error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
+        "sub_group_scan_inclusive_min");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
+        "sub_group_scan_exclusive_add");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
+        "sub_group_scan_exclusive_max");
+    error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
+        "sub_group_scan_exclusive_min");
     return error;
 }
 
@@ -65,11 +65,15 @@ int test_subgroup_functions_extended_types(cl_device_id device,
                  "device, skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
+
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
     WorkGroupParams test_params(global_work_size, local_work_size);
-    RunTestForType rft(device, context, queue, num_elements, test_params);
+    test_params.save_kernel_source(sub_group_reduction_scan_source);
+    test_params.save_kernel_source(sub_group_generic_source,
+                                   "sub_group_broadcast");
 
+    RunTestForType rft(device, context, queue, num_elements, test_params);
     int error = run_broadcast_for_extended_type<cl_uint2>(rft);
     error |= run_broadcast_for_extended_type<subgroups::cl_uint3>(rft);
     error |= run_broadcast_for_extended_type<cl_uint4>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
index 6c44249e..bb257bcd 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
@@ -17,336 +17,29 @@
 #include "subhelpers.h"
 #include "harness/typeWrappers.h"
 #include "subgroup_common_templates.h"
+#include <cstdio>
 
 namespace {
 
-static const char *scinadd_non_uniform_source = R"(
-    __kernel void test_scinadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_non_uniform_arithmetic_source = R"(
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
         int gid = get_global_id(0);
         XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_add(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmax_non_uniform_source = R"(
-    __kernel void test_scinmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_max(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmin_non_uniform_source = R"(
-    __kernel void test_scinmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_min(in[gid]);
-            }
-    }
-)";
-
-static const char *scinmul_non_uniform_source = R"(
-    __kernel void test_scinmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *scinand_non_uniform_source = R"(
-    __kernel void test_scinand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scinor_non_uniform_source = R"(
-    __kernel void test_scinor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scinxor_non_uniform_source = R"(
-    __kernel void test_scinxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scinand_non_uniform_logical_source = R"(
-    __kernel void test_scinand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scinor_non_uniform_logical_source = R"(
-    __kernel void test_scinor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scinxor_non_uniform_logical_source = R"(
-    __kernel void test_scinxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_inclusive_logical_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scexadd_non_uniform_source = R"(
-    __kernel void test_scexadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_add(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmax_non_uniform_source = R"(
-    __kernel void test_scexmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_max(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmin_non_uniform_source = R"(
-    __kernel void test_scexmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_min(in[gid]);
-            }
-    }
-)";
-
-static const char *scexmul_non_uniform_source = R"(
-    __kernel void test_scexmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *scexand_non_uniform_source = R"(
-    __kernel void test_scexand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scexor_non_uniform_source = R"(
-    __kernel void test_scexor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scexxor_non_uniform_source = R"(
-    __kernel void test_scexxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *scexand_non_uniform_logical_source = R"(
-    __kernel void test_scexand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *scexor_non_uniform_logical_source = R"(
-    __kernel void test_scexor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *scexxor_non_uniform_logical_source = R"(
-    __kernel void test_scexxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_scan_exclusive_logical_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *redadd_non_uniform_source = R"(
-    __kernel void test_redadd_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_add(in[gid]);
-            }
-    }
-)";
-
-static const char *redmax_non_uniform_source = R"(
-    __kernel void test_redmax_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_max(in[gid]);
-            }
-    }
-)";
-
-static const char *redmin_non_uniform_source = R"(
-    __kernel void test_redmin_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_min(in[gid]);
-            }
-    }
-)";
-
-static const char *redmul_non_uniform_source = R"(
-    __kernel void test_redmul_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_mul(in[gid]);
-            }
-    }
-)";
-
-static const char *redand_non_uniform_source = R"(
-    __kernel void test_redand_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_and(in[gid]);
-            }
-    }
-)";
-
-static const char *redor_non_uniform_source = R"(
-    __kernel void test_redor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_or(in[gid]);
-            }
-    }
-)";
-
-static const char *redxor_non_uniform_source = R"(
-    __kernel void test_redxor_non_uniform(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_xor(in[gid]);
-            }
-    }
-)";
-
-static const char *redand_non_uniform_logical_source = R"(
-    __kernel void test_redand_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_and(in[gid]);
-            }
-    }
-)";
-
-static const char *redor_non_uniform_logical_source = R"(
-    __kernel void test_redor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_or(in[gid]);
-            }
-    }
-)";
-
-static const char *redxor_non_uniform_logical_source = R"(
-    __kernel void test_redxor_non_uniform_logical(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        int elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_reduce_logical_xor(in[gid]);
-            }
+        uint subgroup_local_id = get_sub_group_local_id();
+        uint elect_work_item = 1 << (subgroup_local_id % 32);
+        uint work_item_mask;
+        if(subgroup_local_id < 32) {
+            work_item_mask = work_item_mask_vector.x;
+        } else if(subgroup_local_id < 64) {
+            work_item_mask = work_item_mask_vector.y;
+        } else if(subgroup_local_id < 96) {
+            work_item_mask = work_item_mask_vector.w;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.z;
+        }
+        if (elect_work_item & work_item_mask){
+            out[gid] = %s(in[gid]);
+        }
     }
 )";
 
@@ -354,52 +47,52 @@ template <typename T>
 int run_functions_add_mul_max_min_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::add_>>(
-        "test_scinadd_non_uniform", scinadd_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_add");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::mul_>>(
-        "test_scinmul_non_uniform", scinmul_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_mul");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::max_>>(
-        "test_scinmax_non_uniform", scinmax_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_max");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::min_>>(
-        "test_scinmin_non_uniform", scinmin_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_min");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::add_>>(
-        "test_scexadd_non_uniform", scexadd_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_add");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::mul_>>(
-        "test_scexmul_non_uniform", scexmul_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_mul");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::max_>>(
-        "test_scexmax_non_uniform", scexmax_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_max");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::min_>>(
-        "test_scexmin_non_uniform", scexmin_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_min");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::add_>>(
-        "test_redadd_non_uniform", redadd_non_uniform_source);
+        "sub_group_non_uniform_reduce_add");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::mul_>>(
-        "test_redmul_non_uniform", redmul_non_uniform_source);
+        "sub_group_non_uniform_reduce_mul");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::max_>>(
-        "test_redmax_non_uniform", redmax_non_uniform_source);
+        "sub_group_non_uniform_reduce_max");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::min_>>(
-        "test_redmin_non_uniform", redmin_non_uniform_source);
+        "sub_group_non_uniform_reduce_min");
     return error;
 }
 
 template <typename T> int run_functions_and_or_xor_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::and_>>(
-        "test_scinand_non_uniform", scinand_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_and");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::or_>>(
-        "test_scinor_non_uniform", scinor_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_or");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::xor_>>(
-        "test_scinxor_non_uniform", scinxor_non_uniform_source);
+        "sub_group_non_uniform_scan_inclusive_xor");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::and_>>(
-        "test_scexand_non_uniform", scexand_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_and");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::or_>>(
-        "test_scexor_non_uniform", scexor_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_or");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::xor_>>(
-        "test_scexxor_non_uniform", scexxor_non_uniform_source);
+        "sub_group_non_uniform_scan_exclusive_xor");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::and_>>(
-        "test_redand_non_uniform", redand_non_uniform_source);
+        "sub_group_non_uniform_reduce_and");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::or_>>(
-        "test_redor_non_uniform", redor_non_uniform_source);
+        "sub_group_non_uniform_reduce_or");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::xor_>>(
-        "test_redxor_non_uniform", redxor_non_uniform_source);
+        "sub_group_non_uniform_reduce_xor");
     return error;
 }
 
@@ -407,23 +100,23 @@ template <typename T>
 int run_functions_logical_and_or_xor_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_and>>(
-        "test_scinand_non_uniform_logical", scinand_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_inclusive_logical_and");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_or>>(
-        "test_scinor_non_uniform_logical", scinor_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_inclusive_logical_or");
     error |= rft.run_impl<T, SCIN_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scinxor_non_uniform_logical", scinxor_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_inclusive_logical_xor");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_and>>(
-        "test_scexand_non_uniform_logical", scexand_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_exclusive_logical_and");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_or>>(
-        "test_scexor_non_uniform_logical", scexor_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_exclusive_logical_or");
     error |= rft.run_impl<T, SCEX_NU<T, ArithmeticOp::logical_xor>>(
-        "test_scexxor_non_uniform_logical", scexxor_non_uniform_logical_source);
+        "sub_group_non_uniform_scan_exclusive_logical_xor");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_and>>(
-        "test_redand_non_uniform_logical", redand_non_uniform_logical_source);
+        "sub_group_non_uniform_reduce_logical_and");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_or>>(
-        "test_redor_non_uniform_logical", redor_non_uniform_logical_source);
+        "sub_group_non_uniform_reduce_logical_or");
     error |= rft.run_impl<T, RED_NU<T, ArithmeticOp::logical_xor>>(
-        "test_redxor_non_uniform_logical", redxor_non_uniform_logical_source);
+        "sub_group_non_uniform_reduce_logical_xor");
     return error;
 }
 
@@ -441,13 +134,11 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
                  "this device, skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000, 0xaaaaaaaa };
 
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size, masks);
+    WorkGroupParams test_params(global_work_size, local_work_size, true);
+    test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_functions_add_mul_max_min_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index 484e9b6b..f956960b 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -28,7 +28,6 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
         int nj = (nw + ns - 1) / ns;
         int non_uniform_size = ng % nw;
         ng = ng / nw;
@@ -40,9 +39,11 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
                  operation_names(operation));
 
         log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d work item mask = 0x%x data type (%s)\n",
-                 test_params.global_workgroup_size, nw, ns, work_items_mask,
+                 "size = %d data type (%s)\n",
+                 test_params.global_workgroup_size, nw, ns,
                  TypeManager<T>::name());
+        log_info("               work items mask: %s\n",
+                 test_params.work_items_mask.to_string().c_str());
         if (non_uniform_size)
         {
             log_info("  non uniform work group size mode ON\n");
@@ -99,7 +100,6 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
-        uint32_t work_items_mask = test_params.work_items_mask;
         int nj = (nw + ns - 1) / ns;
         cl_int tr, rr;
         int non_uniform_size = ng % nw;
@@ -141,8 +141,7 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
                 std::set<int> active_work_items;
                 for (i = 0; i < n; ++i)
                 {
-                    uint32_t check_work_item = 1 << (i % 32);
-                    if (work_items_mask & check_work_item)
+                    if (test_params.work_items_mask.test(i))
                     {
                         active_work_items.insert(i);
                         switch (operation)
@@ -215,46 +214,47 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         return TEST_PASS;
     }
 };
-static const char *elect_source = R"(
-    __kernel void test_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_elect();
-            }
-    }
-)";
-
-static const char *non_uniform_any_source = R"(
-    __kernel void test_non_uniform_any(const __global Type *in, __global int4 *xy, __global Type *out) {
-        int gid = get_global_id(0);
-        XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_any(in[gid]);
-            }
-    }
-)";
 
-static const char *non_uniform_all_source = R"(
-    __kernel void test_non_uniform_all(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_elect_source = R"(
+    __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
         int gid = get_global_id(0);
         XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all(in[gid]);
-            }
+        uint subgroup_local_id = get_sub_group_local_id();
+        uint elect_work_item = 1 << (subgroup_local_id % 32);
+        uint work_item_mask;
+        if(subgroup_local_id < 32) {
+            work_item_mask = work_item_mask_vector.x;
+        } else if(subgroup_local_id < 64) {
+            work_item_mask = work_item_mask_vector.y;
+        } else if(subgroup_local_id < 96) {
+            work_item_mask = work_item_mask_vector.w;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.z;
+        }
+        if (elect_work_item & work_item_mask){
+            out[gid] = sub_group_elect();
+        }
     }
 )";
 
-static const char *non_uniform_all_equal_source = R"(
-    __kernel void test_non_uniform_all_equal(const __global Type *in, __global int4 *xy, __global Type *out) {
+std::string sub_group_non_uniform_any_all_all_equal_source = R"(
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
         int gid = get_global_id(0);
         XY(xy,gid);
-        uint elect_work_item = 1 << (get_sub_group_local_id() % 32);
-            if (elect_work_item & WORK_ITEMS_MASK){
-                out[gid] = sub_group_non_uniform_all_equal(in[gid]);
+        uint subgroup_local_id = get_sub_group_local_id();
+        uint elect_work_item = 1 << (subgroup_local_id % 32);
+        uint work_item_mask;
+        if(subgroup_local_id < 32) {
+            work_item_mask = work_item_mask_vector.x;
+        } else if(subgroup_local_id < 64) {
+            work_item_mask = work_item_mask_vector.y;
+        } else if(subgroup_local_id < 96) {
+            work_item_mask = work_item_mask_vector.w;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.z;
+        }
+        if (elect_work_item & work_item_mask){
+                out[gid] = %s(in[gid]);
             }
     }
 )";
@@ -262,7 +262,7 @@ static const char *non_uniform_all_equal_source = R"(
 template <typename T> int run_vote_all_equal_for_type(RunTestForType rft)
 {
     int error = rft.run_impl<T, VOTE<T, NonUniformVoteOp::all_equal>>(
-        "test_non_uniform_all_equal", non_uniform_all_equal_source);
+        "sub_group_non_uniform_all_equal");
     return error;
 }
 }
@@ -278,12 +278,13 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
                  "device, skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
-    std::vector<uint32_t> masks{ 0xffffffff, 0x55aaaa55, 0x5555aaaa, 0xaaaa5555,
-                                 0x0f0ff0f0, 0x0f0f0f0f, 0xff0000ff, 0xff00ff00,
-                                 0x00ffff00, 0x80000000 };
+
     constexpr size_t global_work_size = 170;
     constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size, masks);
+    WorkGroupParams test_params(global_work_size, local_work_size, true);
+    test_params.save_kernel_source(
+        sub_group_non_uniform_any_all_all_equal_source);
+    test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect");
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_vote_all_equal_for_type<cl_int>(rft);
@@ -295,10 +296,10 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
     error |= run_vote_all_equal_for_type<subgroups::cl_half>(rft);
 
     error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::all>>(
-        "test_non_uniform_all", non_uniform_all_source);
+        "sub_group_non_uniform_all");
     error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::elect>>(
-        "test_elect", elect_source);
+        "sub_group_elect");
     error |= rft.run_impl<cl_int, VOTE<cl_int, NonUniformVoteOp::any>>(
-        "test_non_uniform_any", non_uniform_any_source);
+        "sub_group_non_uniform_any");
     return error;
 }
diff --git a/test_conformance/subgroups/test_subgroup_shuffle.cpp b/test_conformance/subgroups/test_subgroup_shuffle.cpp
index 37b27ced..56231cbf 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle.cpp
@@ -15,38 +15,19 @@
 //
 #include "procs.h"
 #include "subhelpers.h"
+#include "subgroup_common_kernels.h"
 #include "subgroup_common_templates.h"
 #include "harness/typeWrappers.h"
 #include <bitset>
 
 namespace {
 
-static const char* shuffle_xor_source =
-    "__kernel void test_sub_group_shuffle_xor(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_xor(x, xy[gid].z);"
-    "}\n";
-
-static const char* shuffle_source =
-    "__kernel void test_sub_group_shuffle(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle(x, xy[gid].z);"
-    "}\n";
-
 template <typename T> int run_shuffle_for_type(RunTestForType rft)
 {
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>(
-        "test_sub_group_shuffle", shuffle_source);
+    int error =
+        rft.run_impl<T, SHF<T, ShuffleOp::shuffle>>("sub_group_shuffle");
     error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_xor>>(
-        "test_sub_group_shuffle_xor", shuffle_xor_source);
+        "sub_group_shuffle_xor");
     return error;
 }
 
@@ -61,9 +42,11 @@ int test_subgroup_functions_shuffle(cl_device_id device, cl_context context,
                  "skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
+
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
     WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_generic_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_shuffle_for_type<cl_int>(rft);
diff --git a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
index 11401e80..caa1dccc 100644
--- a/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
+++ b/test_conformance/subgroups/test_subgroup_shuffle_relative.cpp
@@ -15,37 +15,19 @@
 //
 #include "procs.h"
 #include "subhelpers.h"
+#include "subgroup_common_kernels.h"
 #include "subgroup_common_templates.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
 
 namespace {
 
-static const char* shuffle_down_source =
-    "__kernel void test_sub_group_shuffle_down(const __global Type *in, "
-    "__global int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_down(x, xy[gid].z);"
-    "}\n";
-static const char* shuffle_up_source =
-    "__kernel void test_sub_group_shuffle_up(const __global Type *in, __global "
-    "int4 *xy, __global Type *out)\n"
-    "{\n"
-    "    int gid = get_global_id(0);\n"
-    "    XY(xy,gid);\n"
-    "    Type x = in[gid];\n"
-    "    out[gid] = sub_group_shuffle_up(x, xy[gid].z);"
-    "}\n";
-
 template <typename T> int run_shuffle_relative_for_type(RunTestForType rft)
 {
-    int error = rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>(
-        "test_sub_group_shuffle_up", shuffle_up_source);
+    int error =
+        rft.run_impl<T, SHF<T, ShuffleOp::shuffle_up>>("sub_group_shuffle_up");
     error |= rft.run_impl<T, SHF<T, ShuffleOp::shuffle_down>>(
-        "test_sub_group_shuffle_down", shuffle_down_source);
+        "sub_group_shuffle_down");
     return error;
 }
 
@@ -62,9 +44,11 @@ int test_subgroup_functions_shuffle_relative(cl_device_id device,
                  "device, skipping test.\n");
         return TEST_SKIPPED_ITSELF;
     }
+
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
     WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_generic_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
     int error = run_shuffle_relative_for_type<cl_int>(rft);
-- 
cgit v1.2.3


From 7147d072c7bbed99e429cb8fe3e86139a12ef8bb Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Mon, 4 Oct 2021 15:42:44 +0200
Subject: Remove space character from extension name (#1336)

---
 test_common/gl/setup_x11.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
index c54ecdec..7efda3d2 100644
--- a/test_common/gl/setup_x11.cpp
+++ b/test_common/gl/setup_x11.cpp
@@ -90,10 +90,17 @@ public:
         }
 
         for (int i=0; i<(int)num_of_devices; i++) {
-            if (!is_extension_available(devices[i], "cl_khr_gl_sharing ")) {
-                log_info("Device %d of %d does not support required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
-            } else {
-                log_info("Device %d of %d supports required extension cl_khr_gl_sharing.\n", i+1, num_of_devices);
+            if (!is_extension_available(devices[i], "cl_khr_gl_sharing"))
+            {
+                log_info("Device %d of %d does not support required extension "
+                         "cl_khr_gl_sharing.\n",
+                         i + 1, num_of_devices);
+            }
+            else
+            {
+                log_info("Device %d of %d supports required extension "
+                         "cl_khr_gl_sharing.\n",
+                         i + 1, num_of_devices);
                 found_valid_device = 1;
                 m_devices[m_device_count++] = devices[i];
             }
-- 
cgit v1.2.3


From 410f46f49fcec65d18d30b0df7a1d7ae0a4cd5db Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Wed, 3 Nov 2021 16:36:36 +0000
Subject: Add testing of sub_group_broadcast for (u)char and (u)short types
 (#1347)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/test_subgroup_extended_types.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test_conformance/subgroups/test_subgroup_extended_types.cpp b/test_conformance/subgroups/test_subgroup_extended_types.cpp
index dbe24623..c9e6bb61 100644
--- a/test_conformance/subgroups/test_subgroup_extended_types.cpp
+++ b/test_conformance/subgroups/test_subgroup_extended_types.cpp
@@ -108,22 +108,26 @@ int test_subgroup_functions_extended_types(cl_device_id device,
     error |= run_broadcast_for_extended_type<cl_double8>(rft);
     error |= run_broadcast_for_extended_type<cl_double16>(rft);
 
+    error |= run_broadcast_for_extended_type<cl_ushort>(rft);
     error |= run_broadcast_for_extended_type<cl_ushort2>(rft);
     error |= run_broadcast_for_extended_type<subgroups::cl_ushort3>(rft);
     error |= run_broadcast_for_extended_type<cl_ushort4>(rft);
     error |= run_broadcast_for_extended_type<cl_ushort8>(rft);
     error |= run_broadcast_for_extended_type<cl_ushort16>(rft);
+    error |= run_broadcast_for_extended_type<cl_short>(rft);
     error |= run_broadcast_for_extended_type<cl_short2>(rft);
     error |= run_broadcast_for_extended_type<subgroups::cl_short3>(rft);
     error |= run_broadcast_for_extended_type<cl_short4>(rft);
     error |= run_broadcast_for_extended_type<cl_short8>(rft);
     error |= run_broadcast_for_extended_type<cl_short16>(rft);
 
+    error |= run_broadcast_for_extended_type<cl_uchar>(rft);
     error |= run_broadcast_for_extended_type<cl_uchar2>(rft);
     error |= run_broadcast_for_extended_type<subgroups::cl_uchar3>(rft);
     error |= run_broadcast_for_extended_type<cl_uchar4>(rft);
     error |= run_broadcast_for_extended_type<cl_uchar8>(rft);
     error |= run_broadcast_for_extended_type<cl_uchar16>(rft);
+    error |= run_broadcast_for_extended_type<cl_char>(rft);
     error |= run_broadcast_for_extended_type<cl_char2>(rft);
     error |= run_broadcast_for_extended_type<subgroups::cl_char3>(rft);
     error |= run_broadcast_for_extended_type<cl_char4>(rft);
-- 
cgit v1.2.3


From e9cd9a446e1b36a02f6e8f959256d5f96eda21a4 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Mon, 8 Nov 2021 11:00:50 +0000
Subject: Remove excessive logging in subgroup tests (#1343)

This also adds some missing data type logging to the
subgroup_functions_non_uniform_vote tests.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/subgroup_common_templates.h          | 37 --------------
 .../subgroups/test_subgroup_ballot.cpp             | 14 ------
 .../subgroups/test_subgroup_non_uniform_vote.cpp   | 58 ++++++++--------------
 3 files changed, 21 insertions(+), 88 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 5c5f9560..349f8100 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -80,7 +80,6 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
                  TypeManager<Ty>::name());
         if (non_uniform_size)
         {
-            log_info("  non uniform work group size mode ON\n");
             ng++;
         }
         for (k = 0; k < ng; ++k)
@@ -581,14 +580,6 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
             : func_name = "sub_group_scan_exclusive";
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d \n",
-                 test_params.global_workgroup_size, nw, ns);
-        if (test_params.work_items_mask.any())
-        {
-            log_info("               work items mask: %s\n",
-                     test_params.work_items_mask.to_string().c_str());
-        }
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -637,16 +628,10 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
                 }
                 if (active_work_items.empty())
                 {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
                     continue;
                 }
                 else if (active_work_items.size() == 1)
                 {
-                    log_info("  One active workitem in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
                     continue;
                 }
                 else
@@ -702,14 +687,6 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d \n",
-                 test_params.global_workgroup_size, nw, ns);
-        if (test_params.work_items_mask.any())
-        {
-            log_info("               work items mask: %s\n",
-                     test_params.work_items_mask.to_string().c_str());
-        }
     }
 
     static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -765,9 +742,6 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
                 }
                 if (active_work_items.empty())
                 {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
                     continue;
                 }
                 else
@@ -828,14 +802,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
             : func_name = "sub_group_reduce";
         log_info("  %s_%s(%s)...\n", func_name.c_str(),
                  operation_names(operation), TypeManager<Ty>::name());
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d \n",
-                 test_params.global_workgroup_size, nw, ns);
-        if (test_params.work_items_mask.any())
-        {
-            log_info("               work items mask: %s\n",
-                     test_params.work_items_mask.to_string().c_str());
-        }
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -894,9 +860,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
 
                 if (active_work_items.empty())
                 {
-                    log_info("  No acitve workitems in workgroup id = %d "
-                             "subgroup id = %d - no calculation\n",
-                             k, j);
                     continue;
                 }
 
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 2bd54e43..ac90bad7 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -31,10 +31,6 @@ template <typename Ty> struct BALLOT
         int sbs = test_params.subgroup_size;
         int non_uniform_size = gws % lws;
         log_info("  sub_group_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
     }
 
     static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -117,11 +113,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
         log_info("  sub_group_%s(%s)...\n", operation_names(operation),
                  TypeManager<Ty>::name());
 
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
-
         for (wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
             for (sb_id = 0; sb_id < sb_number; ++sb_id)
@@ -275,10 +266,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
         int sbs = test_params.subgroup_size;
         int non_uniform_size = gws % lws;
         log_info("  sub_group_inverse_ballot...\n");
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
         // no work here
     }
 
@@ -379,7 +366,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                  TypeManager<Ty>::name());
         if (non_uniform_size)
         {
-            log_info("  non uniform work group size mode ON\n");
             wg_number++;
         }
         int e;
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index f956960b..835de25d 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -34,20 +34,10 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         int last_subgroup_size = 0;
         ii = 0;
 
-        log_info("  sub_group_%s%s... \n",
+        log_info("  sub_group_%s%s(%s)... \n",
                  (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
+                 operation_names(operation), TypeManager<T>::name());
 
-        log_info("  test params: global size = %d local size = %d subgroups "
-                 "size = %d data type (%s)\n",
-                 test_params.global_workgroup_size, nw, ns,
-                 TypeManager<T>::name());
-        log_info("               work items mask: %s\n",
-                 test_params.work_items_mask.to_string().c_str());
-        if (non_uniform_size)
-        {
-            log_info("  non uniform work group size mode ON\n");
-        }
         if (operation == NonUniformVoteOp::elect) return;
 
         for (k = 0; k < ng; ++k)
@@ -171,34 +161,28 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
                 }
                 if (active_work_items.empty())
                 {
-                    log_info("  no one workitem acitve... in workgroup id = %d "
-                             "subgroup id = %d\n",
-                             k, j);
+                    continue;
                 }
-                else
+                auto lowest_active = active_work_items.begin();
+                for (const int &active_work_item : active_work_items)
                 {
-                    auto lowest_active = active_work_items.begin();
-                    for (const int &active_work_item : active_work_items)
+                    i = active_work_item;
+                    if (operation == NonUniformVoteOp::elect)
                     {
-                        i = active_work_item;
-                        if (operation == NonUniformVoteOp::elect)
-                        {
-                            i == *lowest_active ? tr = 1 : tr = 0;
-                        }
+                        i == *lowest_active ? tr = 1 : tr = 0;
+                    }
 
-                        // normalize device values on host, non zero set 1.
-                        rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
+                    // normalize device values on host, non zero set 1.
+                    rr = compare_ordered<T>(my[ii + i], 0) ? 0 : 1;
 
-                        if (rr != tr)
-                        {
-                            log_error("ERROR: sub_group_%s() \n",
-                                      operation_names(operation));
-                            log_error(
-                                "mismatch for work item %d sub group %d in "
-                                "work group %d. Expected: %d Obtained: %d\n",
-                                i, j, k, tr, rr);
-                            return TEST_FAIL;
-                        }
+                    if (rr != tr)
+                    {
+                        log_error("ERROR: sub_group_%s() \n",
+                                  operation_names(operation));
+                        log_error("mismatch for work item %d sub group %d in "
+                                  "work group %d. Expected: %d Obtained: %d\n",
+                                  i, j, k, tr, rr);
+                        return TEST_FAIL;
                     }
                 }
             }
@@ -208,9 +192,9 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
             m += 4 * nw;
         }
 
-        log_info("  sub_group_%s%s... passed\n",
+        log_info("  sub_group_%s%s(%s)... passed\n",
                  (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation));
+                 operation_names(operation), TypeManager<T>::name());
         return TEST_PASS;
     }
 };
-- 
cgit v1.2.3


From 1116a71ba2994ecf761d2ab853de7de51448500d Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 16 Nov 2021 11:27:04 +0000
Subject: Improve error handling in subgroup tests (#1352)

* MPGCOMP-14761 Improve error handling in subgroup tests

Signed-off-by: Stuart Brady <stuart.brady@arm.com>

* Add missing newline
---
 test_common/harness/errorHelpers.h                 |  1 +
 .../subgroups/subgroup_common_templates.h          | 20 ++++----
 test_conformance/subgroups/subhelpers.h            | 58 ++++++++++++++--------
 test_conformance/subgroups/test_barrier.cpp        | 10 ++--
 test_conformance/subgroups/test_ifp.cpp            | 12 ++---
 test_conformance/subgroups/test_subgroup.cpp       |  4 +-
 .../subgroups/test_subgroup_ballot.cpp             | 20 ++++----
 .../subgroups/test_subgroup_clustered_reduce.cpp   |  4 +-
 .../subgroups/test_subgroup_non_uniform_vote.cpp   |  4 +-
 9 files changed, 74 insertions(+), 59 deletions(-)

diff --git a/test_common/harness/errorHelpers.h b/test_common/harness/errorHelpers.h
index d59bc78d..80eb3b58 100644
--- a/test_common/harness/errorHelpers.h
+++ b/test_common/harness/errorHelpers.h
@@ -62,6 +62,7 @@ static int vlog_win32(const char *format, ...);
         return TEST_FAIL;                                                      \
     }
 #define test_error(errCode, msg) test_error_ret(errCode, msg, errCode)
+#define test_error_fail(errCode, msg) test_error_ret(errCode, msg, TEST_FAIL)
 #define test_error_ret(errCode, msg, retValue)                                 \
     {                                                                          \
         auto errCodeResult = errCode;                                          \
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 349f8100..cfe02c2f 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -168,8 +168,8 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
         }
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, l, n;
         int ng = test_params.global_workgroup_size;
@@ -499,8 +499,8 @@ template <typename Ty, ShuffleOp operation> struct SHF
         }
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, l, n;
         int nw = test_params.local_workgroup_size;
@@ -583,8 +583,8 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int nw = test_params.local_workgroup_size;
@@ -689,8 +689,8 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
                  operation_names(operation), TypeManager<Ty>::name());
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int nw = test_params.local_workgroup_size;
@@ -805,8 +805,8 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int nw = test_params.local_workgroup_size;
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 6d32928a..bd4b6d61 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1375,25 +1375,31 @@ static int run_kernel(cl_context context, cl_command_queue queue,
 // Driver for testing a single built in function
 template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
 {
-    static int mrun(cl_device_id device, cl_context context,
-                    cl_command_queue queue, int num_elements, const char *kname,
-                    const char *src, WorkGroupParams test_params)
+    static test_status mrun(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements,
+                            const char *kname, const char *src,
+                            WorkGroupParams test_params)
     {
-        int error = TEST_PASS;
+        test_status combined_error = TEST_SKIPPED_ITSELF;
         for (auto &mask : test_params.all_work_item_masks)
         {
             test_params.work_items_mask = mask;
-            error |= run(device, context, queue, num_elements, kname, src,
-                         test_params);
+            test_status error = run(device, context, queue, num_elements, kname,
+                                    src, test_params);
+
+            if (error == TEST_FAIL
+                || (error == TEST_PASS && combined_error != TEST_FAIL))
+                combined_error = error;
         }
-        return error;
+        return combined_error;
     };
-    static int run(cl_device_id device, cl_context context,
-                   cl_command_queue queue, int num_elements, const char *kname,
-                   const char *src, WorkGroupParams test_params)
+    static test_status run(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements,
+                           const char *kname, const char *src,
+                           WorkGroupParams test_params)
     {
         size_t tmp;
-        int error;
+        cl_int error;
         int subgroup_size, num_subgroups;
         size_t realSize;
         size_t global = test_params.global_workgroup_size;
@@ -1434,7 +1440,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         if (!TypeManager<Ty>::type_supported(device))
         {
             log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
-            return 0;
+            return TEST_SKIPPED_ITSELF;
         }
         else
         {
@@ -1450,7 +1456,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
 
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
                                 (void *)&platform, NULL);
-        test_error(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
+        test_error_fail(error, "clGetDeviceInfo failed for CL_DEVICE_PLATFORM");
         if (test_params.use_core_subgroups)
         {
             kernel_sstr
@@ -1465,12 +1471,12 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
 
         error = create_single_kernel_helper(context, &program, &kernel, 1,
                                             &kernel_src, kname);
-        if (error != 0) return error;
+        if (error != CL_SUCCESS) return TEST_FAIL;
 
         // Determine some local dimensions to use for the test.
         error = get_max_common_work_group_size(
             context, kernel, test_params.global_workgroup_size, &local);
-        test_error(error, "get_max_common_work_group_size failed");
+        test_error_fail(error, "get_max_common_work_group_size failed");
 
         // Limit it a bit so we have muliple work groups
         // Ideally this will still be large enough to give us multiple
@@ -1543,7 +1549,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
                            input_array_size * sizeof(Ty), sgmap.data(),
                            global * sizeof(cl_int4), odata.data(),
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel first time failed");
+        test_error_fail(error, "Running kernel first time failed");
 
         // Generate the desired input for the kernel
 
@@ -1553,13 +1559,18 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
                            input_array_size * sizeof(Ty), sgmap.data(),
                            global * sizeof(cl_int4), odata.data(),
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error(error, "Running kernel second time failed");
+        test_error_fail(error, "Running kernel second time failed");
 
         // Check the result
-        error = Fns::chk(idata.data(), odata.data(), mapin.data(),
-                         mapout.data(), sgmap.data(), test_params);
-        test_error(error, "Data verification failed");
-        return TEST_PASS;
+        test_status status = Fns::chk(idata.data(), odata.data(), mapin.data(),
+                                      mapout.data(), sgmap.data(), test_params);
+        // Detailed failure and skip messages should be logged by Fns::gen
+        // and Fns::chk.
+        if (status == TEST_FAIL)
+        {
+            test_fail("Data verification failed\n");
+        }
+        return status;
     }
 };
 
@@ -1625,7 +1636,10 @@ struct RunTestForType
                                     test_params_);
         }
 
-        return error;
+        // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be
+        // reported as having been skipped even if some tests within it
+        // passed, as the status codes are erroneously ORed together:
+        return error == TEST_FAIL ? TEST_FAIL : TEST_PASS;
     }
 
 private:
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index 47e42f65..b570e922 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -92,8 +92,8 @@ template <int Which> struct BAR
         }
     }
 
-    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my,
+                           cl_int *m, const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int nw = test_params.local_workgroup_size;
@@ -133,7 +133,7 @@ template <int Which> struct BAR
                                   "id %d in sub group %d in group %d expected "
                                   "%d got %d\n",
                                   i, j, k, tr, rr);
-                        return -1;
+                        return TEST_FAIL;
                     }
                 }
             }
@@ -143,7 +143,7 @@ template <int Which> struct BAR
             m += 2 * nw;
         }
 
-        return 0;
+        return TEST_PASS;
     }
 };
 
@@ -187,4 +187,4 @@ int test_barrier_functions_ext(cl_device_id device, cl_context context,
     }
 
     return test_barrier_functions(device, context, queue, num_elements, false);
-}
\ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index fccaa8c7..f6c5227d 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -245,8 +245,8 @@ struct IFP
         }
     }
 
-    static int chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
-                   const WorkGroupParams &test_params)
+    static test_status chk(cl_int *x, cl_int *y, cl_int *t, cl_int *, cl_int *,
+                           const WorkGroupParams &test_params)
     {
         int i, k;
         int nw = test_params.local_workgroup_size;
@@ -255,8 +255,8 @@ struct IFP
         int nj = (nw + ns - 1) / ns;
         ng = ng / nw;
 
-        // We need at least 2 sub groups per group for this tes
-        if (nj == 1) return 0;
+        // We need at least 2 sub groups per group for this test
+        if (nj == 1) return TEST_SKIPPED_ITSELF;
 
         log_info("  independent forward progress...\n");
 
@@ -270,14 +270,14 @@ struct IFP
                     log_error(
                         "ERROR: mismatch at element %d in work group %d\n", i,
                         k);
-                    return -1;
+                    return TEST_FAIL;
                 }
             }
             x += nj * (NUM_LOC + 1);
             y += NUM_LOC;
         }
 
-        return 0;
+        return TEST_PASS;
     }
 };
 
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
index 63bfc453..eefca5f8 100644
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ b/test_conformance/subgroups/test_subgroup.cpp
@@ -68,8 +68,8 @@ template <NonUniformVoteOp operation> struct AA
         }
     }
 
-    static int chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(cl_int *x, cl_int *y, cl_int *mx, cl_int *my,
+                           cl_int *m, const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int ng = test_params.global_workgroup_size;
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index ac90bad7..0228e82c 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -33,8 +33,8 @@ template <typename Ty> struct BALLOT
         log_info("  sub_group_ballot...\n");
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
@@ -146,8 +146,8 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
         }
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, l, sb_id;
         int gws = test_params.global_workgroup_size;
@@ -269,8 +269,8 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
         // no work here
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
@@ -444,8 +444,8 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
         return mask;
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
@@ -617,8 +617,8 @@ template <typename Ty, BallotOp operation> struct SMASK
         }
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 11fcebc4..ad9e1ff2 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -50,8 +50,8 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
-    static int chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index 835de25d..b21a9f7e 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -83,8 +83,8 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         }
     }
 
-    static int chk(T *x, T *y, T *mx, T *my, cl_int *m,
-                   const WorkGroupParams &test_params)
+    static test_status chk(T *x, T *y, T *mx, T *my, cl_int *m,
+                           const WorkGroupParams &test_params)
     {
         int ii, i, j, k, n;
         int nw = test_params.local_workgroup_size;
-- 
cgit v1.2.3


From 1c6dbc23e74afeb5dcfdf2de2d69734c6b02a845 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 16 Nov 2021 14:03:06 +0000
Subject: Clean up logging in cl_khr_subgroup_ballot tests (#1351)

The tests were logging scalar results as vectors padded with zeroes for
no apparent benefit.  Fix this.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/test_subgroup_ballot.cpp             | 47 +++++++++-------------
 1 file changed, 19 insertions(+), 28 deletions(-)

diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 0228e82c..ee2c5e51 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -81,8 +81,8 @@ template <typename Ty> struct BALLOT
                     {
                         log_error(
                             "ERROR: sub_group_ballot mismatch for local id "
-                            "%d in sub group %d in group %d obtained {%d}, "
-                            "expected {%d} \n",
+                            "%d in sub group %d in group %d obtained %d, "
+                            "expected %d\n",
                             wi_id, sb_id, wg_id, device_result,
                             expected_result);
                         return TEST_FAIL;
@@ -455,7 +455,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
         int non_uniform_size = gws % lws;
         int wg_number = gws / lws;
         wg_number = non_uniform_size ? wg_number + 1 : wg_number;
-        cl_uint4 expected_result, device_result;
+        cl_uint expected_result, device_result;
         int last_subgroup_size = 0;
         int current_sbs = 0;
 
@@ -487,7 +487,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                     current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
                 }
                 // Check result
-                expected_result = { 0, 0, 0, 0 };
+                expected_result = 0;
                 for (wi_id = 0; wi_id < current_sbs; ++wi_id)
                 { // for subgroup element
                     bs128 bs;
@@ -497,23 +497,20 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         | (bs128(mx[wg_offset + wi_id].s2) << 64)
                         | (bs128(mx[wg_offset + wi_id].s3) << 96);
                     bs &= getImportantBits(wi_id, current_sbs);
-                    device_result = my[wg_offset + wi_id];
+                    device_result = my[wg_offset + wi_id].s0;
                     if (operation == BallotOp::ballot_inclusive_scan
                         || operation == BallotOp::ballot_exclusive_scan
                         || operation == BallotOp::ballot_bit_count)
                     {
-                        expected_result.s0 = bs.count();
+                        expected_result = bs.count();
                         if (!compare(device_result, expected_result))
                         {
                             log_error("ERROR: sub_group_%s "
                                       "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
+                                      "%d in group %d obtained %d, "
+                                      "expected %d\n",
                                       operation_names(operation), wi_id, sb_id,
-                                      wg_id, device_result.s0, device_result.s1,
-                                      device_result.s2, device_result.s3,
-                                      expected_result.s0, expected_result.s1,
-                                      expected_result.s2, expected_result.s3);
+                                      wg_id, device_result, expected_result);
                             return TEST_FAIL;
                         }
                     }
@@ -523,7 +520,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         {
                             if (bs.test(id))
                             {
-                                expected_result.s0 = id;
+                                expected_result = id;
                                 break;
                             }
                         }
@@ -531,13 +528,10 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         {
                             log_error("ERROR: sub_group_ballot_find_lsb "
                                       "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
+                                      "%d in group %d obtained %d, "
+                                      "expected %d\n",
+                                      wi_id, sb_id, wg_id, device_result,
+                                      expected_result);
                             return TEST_FAIL;
                         }
                     }
@@ -547,7 +541,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         {
                             if (bs.test(id))
                             {
-                                expected_result.s0 = id;
+                                expected_result = id;
                                 break;
                             }
                         }
@@ -555,13 +549,10 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         {
                             log_error("ERROR: sub_group_ballot_find_msb "
                                       "mismatch for local id %d in sub group "
-                                      "%d in group %d obtained {%d, %d, %d, "
-                                      "%d}, expected {%d, %d, %d, %d}\n",
-                                      wi_id, sb_id, wg_id, device_result.s0,
-                                      device_result.s1, device_result.s2,
-                                      device_result.s3, expected_result.s0,
-                                      expected_result.s1, expected_result.s2,
-                                      expected_result.s3);
+                                      "%d in group %d obtained %d, "
+                                      "expected %d\n",
+                                      wi_id, sb_id, wg_id, device_result,
+                                      expected_result);
                             return TEST_FAIL;
                         }
                     }
-- 
cgit v1.2.3


From 3cd906aa9b8b96ae0651269c47d6b8cc475c62f5 Mon Sep 17 00:00:00 2001
From: marcat03 <94451804+marcat03@users.noreply.github.com>
Date: Tue, 16 Nov 2021 16:07:43 +0000
Subject: Fix missing cl_khr_semaphore extensions in compiler tests (#1357)

* Added missing extensions related to cl_khr_semaphore

Signed-off-by: Marco Cattani <marco.cattani@arm.com>
---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index de30e06b..2f29d39b 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -75,6 +75,9 @@ const char *known_extensions[] = {
     "cl_khr_pci_bus_info",
     "cl_khr_suggested_local_work_size",
     "cl_khr_spirv_linkonce_odr",
+    "cl_khr_semaphore",
+    "cl_khr_external_semaphore",
+    "cl_khr_external_semaphore_sync_fd",
 };
 
 size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *);
-- 
cgit v1.2.3


From c25709f3964f1675a03c1a4f1315a09a4386c0bc Mon Sep 17 00:00:00 2001
From: James Price <jrprice@google.com>
Date: Tue, 23 Nov 2021 14:04:02 -0500
Subject: Fix stack-use-after-scope crash in conversions (#1358)

The way that program sources were being constructed involved capturing
pointers to strings that were allocated on the stack, and then trying
to use them outside of that scope. This change uses a stringstream
defined in the outer scope to build the program instead.
---
 test_conformance/conversions/test_conversions.cpp | 116 ++++++++++------------
 1 file changed, 54 insertions(+), 62 deletions(-)

diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index e8e572e6..d489e28a 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -38,6 +38,7 @@
 #include <sys/param.h>
 #endif
 
+#include <sstream>
 #include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
@@ -1559,84 +1560,40 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
     cl_program program;
     char testName[256];
     int error = 0;
-    const char **strings;
-    size_t stringCount = 0;
+
+    std::ostringstream source;
+    if (outType == kdouble || inType == kdouble)
+        source << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
 
     // Create the program. This is a bit complicated because we are trying to avoid byte and short stores.
     if (0 == vectorSize)
     {
+        // Create the type names.
         char inName[32];
         char outName[32];
-        const char *programSource[] =
-        {
-            "", // optional pragma
-            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
-            "{\n"
-            "   size_t i = get_global_id(0);\n"
-            "   dest[i] =  src[i];\n"
-            "}\n"
-        };
-        stringCount = sizeof(programSource) / sizeof(programSource[0]);
-        strings = programSource;
-
-        if (outType == kdouble || inType == kdouble)
-            programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-
-        //create the type name
         strncpy(inName, gTypeNames[inType], sizeof(inName));
         strncpy(outName, gTypeNames[outType], sizeof(outName));
         sprintf(testName, "test_implicit_%s_%s", outName, inName);
-        vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType], gTypeNames[outType]);
+
+        source << "__kernel void " << testName << "( __global " << inName
+               << " *src, __global " << outName << " *dest )\n";
+        source << "{\n";
+        source << "   size_t i = get_global_id(0);\n";
+        source << "   dest[i] =  src[i];\n";
+        source << "}\n";
+
+        vlog("Building implicit %s -> %s conversion test\n", gTypeNames[inType],
+             gTypeNames[outType]);
         fflush(stdout);
     }
     else
     {
         int vectorSizetmp = vectorSizes[vectorSize];
 
+        // Create the type names.
         char convertString[128];
         char inName[32];
         char outName[32];
-        const char *programSource[] =
-        {
-            "", // optional pragma
-            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
-            "{\n"
-            "   size_t i = get_global_id(0);\n"
-            "   dest[i] = ", convertString, "( src[i] );\n"
-            "}\n"
-        };
-        const char *programSourceV3[] =
-        {
-            "", // optional pragma
-            "__kernel void ", testName, "( __global ", inName, " *src, __global ", outName, " *dest )\n"
-            "{\n"
-            "   size_t i = get_global_id(0);\n"
-            "   if( i + 1 < get_global_size(0))\n"
-            "       vstore3( ", convertString, "( vload3( i, src)), i, dest );\n"
-            "   else\n"
-            "   {\n"
-            "       ", inName, "3 in;\n"
-            "       ", outName, "3 out;\n"
-            "       if( 0 == (i & 1) )\n"
-            "           in.y = src[3*i+1];\n"
-            "       in.x = src[3*i];\n"
-            "       out = ", convertString, "( in ); \n"
-            "       dest[3*i] = out.x;\n"
-            "       if( 0 == (i & 1) )\n"
-            "           dest[3*i+1] = out.y;\n"
-            "   }\n"
-            "}\n"
-        };
-        stringCount = 3 == vectorSizetmp ? sizeof(programSourceV3) / sizeof(programSourceV3[0]) :
-            sizeof(programSource) / sizeof(programSource[0]);
-        strings = 3 == vectorSizetmp ? programSourceV3 : programSource;
-
-        if (outType == kdouble || inType == kdouble) {
-            programSource[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-            programSourceV3[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
-        }
-
-        //create the type name
         switch (vectorSizetmp)
         {
         case 1:
@@ -1661,8 +1618,40 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
             vlog("Building %s( %s ) test\n", convertString, inName);
             break;
         }
-
         fflush(stdout);
+
+        if (vectorSizetmp == 3)
+        {
+            source << "__kernel void " << testName << "( __global " << inName
+                   << " *src, __global " << outName << " *dest )\n";
+            source << "{\n";
+            source << "   size_t i = get_global_id(0);\n";
+            source << "   if( i + 1 < get_global_size(0))\n";
+            source << "       vstore3( " << convertString
+                   << "( vload3( i, src)), i, dest );\n";
+            source << "   else\n";
+            source << "   {\n";
+            source << "       " << inName << "3 in;\n";
+            source << "       " << outName << "3 out;\n";
+            source << "       if( 0 == (i & 1) )\n";
+            source << "           in.y = src[3*i+1];\n";
+            source << "       in.x = src[3*i];\n";
+            source << "       out = " << convertString << "( in ); \n";
+            source << "       dest[3*i] = out.x;\n";
+            source << "       if( 0 == (i & 1) )\n";
+            source << "           dest[3*i+1] = out.y;\n";
+            source << "   }\n";
+            source << "}\n";
+        }
+        else
+        {
+            source << "__kernel void " << testName << "( __global " << inName
+                   << " *src, __global " << outName << " *dest )\n";
+            source << "{\n";
+            source << "   size_t i = get_global_id(0);\n";
+            source << "   dest[i] = " << convertString << "( src[i] );\n";
+            source << "}\n";
+        }
     }
     *outKernel = NULL;
 
@@ -1671,7 +1660,10 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
         flags = "-cl-denorms-are-zero";
 
     // build it
-    error = create_single_kernel_helper(gContext, &program, outKernel, (cl_uint)stringCount, strings, testName, flags);
+    std::string sourceString = source.str();
+    const char *programSource = sourceString.c_str();
+    error = create_single_kernel_helper(gContext, &program, outKernel, 1,
+                                        &programSource, testName, flags);
     if (error)
     {
         char    buffer[2048] = "";
-- 
cgit v1.2.3


From 3eb0f50d85df0350af29f5f1dbbf5a3ddef906b3 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Thu, 25 Nov 2021 13:36:20 +0000
Subject: Use maximum subgroup size in sub_group_ballot tests (#1344)

sub_group_ballot_bit_count() and sub_group_ballot_find_msb() mask
their input according to a subgroup size, which is assumed to be the
maximum subgroup size, and not the actual subgroup size excluding
non-existent work-items in the "remainder" subgroup.

Fix this as per the the clarification made to the OpenCL C specification
in revision 3.0.9 for issue KhronosGroup/OpenCL-Docs#626 by pull request
KhronosGroup/OpenCL-Docs#689.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/test_subgroup_ballot.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index ee2c5e51..f362a501 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -496,7 +496,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                         | (bs128(mx[wg_offset + wi_id].s1) << 32)
                         | (bs128(mx[wg_offset + wi_id].s2) << 64)
                         | (bs128(mx[wg_offset + wi_id].s3) << 96);
-                    bs &= getImportantBits(wi_id, current_sbs);
+                    bs &= getImportantBits(wi_id, sbs);
                     device_result = my[wg_offset + wi_id].s0;
                     if (operation == BallotOp::ballot_inclusive_scan
                         || operation == BallotOp::ballot_exclusive_scan
@@ -516,7 +516,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                     }
                     else if (operation == BallotOp::ballot_find_lsb)
                     {
-                        for (int id = 0; id < current_sbs; ++id)
+                        for (int id = 0; id < sbs; ++id)
                         {
                             if (bs.test(id))
                             {
@@ -537,7 +537,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                     }
                     else if (operation == BallotOp::ballot_find_msb)
                     {
-                        for (int id = current_sbs - 1; id >= 0; --id)
+                        for (int id = sbs - 1; id >= 0; --id)
                         {
                             if (bs.test(id))
                             {
-- 
cgit v1.2.3


From 6dff4fdffadff59c42083bd2f685598613c30091 Mon Sep 17 00:00:00 2001
From: BKoscielak <bartosz.koscielak@intel.com>
Date: Thu, 25 Nov 2021 14:40:19 +0100
Subject: Fix conversion data loss in test_api min_max_constant_args (#1355)

---
 test_conformance/api/test_api_min_max.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 9e981cd3..8d132fe6 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1489,7 +1489,7 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com
 
     error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
     test_error( error, "Unable to get max constant buffer size" );
-    individualBufferSize = ((int)maxSize/2)/maxArgs;
+    individualBufferSize = (maxSize / 2) / maxArgs;
 
     log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
              (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
-- 
cgit v1.2.3


From 6f50623ba867ee5a847464e15937b1a9bda3506c Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Thu, 25 Nov 2021 14:41:06 +0100
Subject: Subgroups tests - sub_group_non_uniform_scan_exclusive function fixes
 (#1350)

* Fix - comparing results will never happen.

* No special action needed for one work item in the subgroup
---
 test_conformance/subgroups/subgroup_common_templates.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index cfe02c2f..64b4b971 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -630,19 +630,12 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
                 {
                     continue;
                 }
-                else if (active_work_items.size() == 1)
-                {
-                    continue;
-                }
                 else
                 {
                     tr = TypeManager<Ty>::identify_limits(operation);
-                    int idx = 0;
                     for (const int &active_work_item : active_work_items)
                     {
                         rr = my[ii + active_work_item];
-                        if (idx == 0) continue;
-
                         if (!compare_ordered(rr, tr))
                         {
                             log_error(
@@ -655,7 +648,6 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
                         }
                         tr = calculate<Ty>(tr, mx[ii + active_work_item],
                                            operation);
-                        idx++;
                     }
                 }
             }
-- 
cgit v1.2.3


From 7625011b666c1a7c1fee5818309e9ed3d658a899 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Fri, 26 Nov 2021 15:30:23 +0000
Subject: Remove unused inclusion of <cstdio> (#1362)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
index bb257bcd..5ab45222 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
@@ -17,7 +17,6 @@
 #include "subhelpers.h"
 #include "harness/typeWrappers.h"
 #include "subgroup_common_templates.h"
-#include <cstdio>
 
 namespace {
 
-- 
cgit v1.2.3


From f8ec235d3c1555fbfaa7eea6bf5f3b588de1b03f Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Thu, 2 Dec 2021 15:27:30 +0000
Subject: Tidy up code to determine bit mask for ballot scans (#1363)

It seems more intuitive to set only the bits that are required, rather
than to set one more bit than is required, only to clear it again.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/test_subgroup_ballot.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index f362a501..e742aa3b 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -437,9 +437,9 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
         else if (operation == BallotOp::ballot_inclusive_scan
                  || operation == BallotOp::ballot_exclusive_scan)
         {
-            for (cl_uint i = 0; i <= sub_group_local_id; ++i) mask.set(i);
-            if (operation == BallotOp::ballot_exclusive_scan)
-                mask.reset(sub_group_local_id);
+            for (cl_uint i = 0; i < sub_group_local_id; ++i) mask.set(i);
+            if (operation == BallotOp::ballot_inclusive_scan)
+                mask.set(sub_group_local_id);
         }
         return mask;
     }
-- 
cgit v1.2.3


From e106be14f9d21a13d485c8256da6cccb933850cd Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Sat, 4 Dec 2021 18:55:17 +0100
Subject: Test api min max - fix printing cl_ulong data type (#1212)

* test api - fix code formatting only

* Fix printing cl_ulong type to avoid overloading.

* Fix printing size_t data type

* Fix printing size_t data type - set unsinged

* Fix formatting for maxArgs (uint) and numberOfInts (size_t)
---
 test_conformance/api/test_api_min_max.cpp | 1746 ++++++++++++++++++-----------
 1 file changed, 1087 insertions(+), 659 deletions(-)

diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 8d132fe6..28ca8237 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,7 +24,8 @@ const char *sample_single_param_kernel[] = {
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_single_param_write_kernel[] = {
     "__kernel void sample_test(__global int *src)\n"
@@ -32,23 +33,29 @@ const char *sample_single_param_write_kernel[] = {
     "    int  tid = get_global_id(0);\n"
     "     src[tid] = tid;\n"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_read_image_kernel_pattern[] = {
-    "__kernel void sample_test( __global float *result, ",  " )\n"
+    "__kernel void sample_test( __global float *result, ",
+    " )\n"
     "{\n"
-    "  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
+    "  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | "
+    "CLK_FILTER_NEAREST;\n"
     "    int  tid = get_global_id(0);\n"
     "    result[0] = 0.0f;\n",
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_write_image_kernel_pattern[] = {
-    "__kernel void sample_test( ",  " )\n"
+    "__kernel void sample_test( ",
+    " )\n"
     "{\n"
     "    int  tid = get_global_id(0);\n",
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 
 const char *sample_large_parmam_kernel_pattern[] = {
@@ -57,7 +64,8 @@ const char *sample_large_parmam_kernel_pattern[] = {
     "result[0] = 0;\n"
     "%s"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_large_int_parmam_kernel_pattern[] = {
     "__kernel void sample_test(%s, __global int *result)\n"
@@ -65,15 +73,19 @@ const char *sample_large_int_parmam_kernel_pattern[] = {
     "result[0] = 0;\n"
     "%s"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_sampler_kernel_pattern[] = {
-    "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
+    "__kernel void sample_test( read_only image2d_t src, __global int4 *dst",
+    ", sampler_t sampler%d",
+    ")\n"
     "{\n"
     "    int  tid = get_global_id(0);\n",
     "     dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_const_arg_kernel[] = {
     "__kernel void sample_test(__constant int *src1, __global int *dst)\n"
@@ -82,10 +94,12 @@ const char *sample_const_arg_kernel[] = {
     "\n"
     "    dst[tid] = src1[tid];\n"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_local_arg_kernel[] = {
-    "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n"
+    "__kernel void sample_test(__local int *src1, __global int *global_src, "
+    "__global int *dst)\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
     "\n"
@@ -93,19 +107,21 @@ const char *sample_local_arg_kernel[] = {
     "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
     "    dst[tid] = src1[tid];\n"
     "\n"
-    "}\n" };
+    "}\n"
+};
 
 const char *sample_const_max_arg_kernel_pattern =
-"__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    dst[tid] = src1[tid];\n"
-"%s"
-"\n"
-"}\n";
-
-int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+    "__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "\n"
+    "    dst[tid] = src1[tid];\n"
+    "%s"
+    "\n"
+    "}\n";
+
+int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
     int error, retVal;
     unsigned int maxThreadDim, threadDim, i;
@@ -118,19 +134,24 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl
 
 
     /* Get the max thread dimensions */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL );
-    test_error( error, "Unable to get max work item dimensions from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                            sizeof(maxThreadDim), &maxThreadDim, NULL);
+    test_error(error, "Unable to get max work item dimensions from device");
 
-    if( maxThreadDim < 3 )
+    if (maxThreadDim < 3)
     {
-        log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim );
+        log_error("ERROR: Reported max work item dimensions is less than "
+                  "required! (%d)\n",
+                  maxThreadDim);
         return -1;
     }
 
     log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
 
     /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    sample_single_param_kernel, "sample_test")
+        != 0)
     {
         return -1;
     }
@@ -138,105 +159,122 @@ int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl
     /* Create some I/O streams */
     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                 sizeof(cl_int) * 100, NULL, &error);
-    if( streams[0] == NULL )
+    if (streams[0] == NULL)
     {
         log_error("ERROR: Creating test array failed!\n");
         return -1;
     }
 
     /* Set the arguments */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set kernel arguments");
 
     retVal = 0;
 
     /* Now try running the kernel with up to that many threads */
-    for (threadDim=1; threadDim <= maxThreadDim; threadDim++)
+    for (threadDim = 1; threadDim <= maxThreadDim; threadDim++)
     {
-        threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
-        localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
-        for( i = 0; i < maxThreadDim; i++ )
+        threads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
+        localThreads = (size_t *)malloc(sizeof(size_t) * maxThreadDim);
+        for (i = 0; i < maxThreadDim; i++)
         {
-            threads[ i ] = 1;
+            threads[i] = 1;
             localThreads[i] = 1;
         }
 
-        error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event );
-        test_error( error, "Failed clEnqueueNDRangeKernel");
+        error = clEnqueueNDRangeKernel(queue, kernel, maxThreadDim, NULL,
+                                       threads, localThreads, 0, NULL, &event);
+        test_error(error, "Failed clEnqueueNDRangeKernel");
 
         // Verify that the event does not return an error from the execution
         error = clWaitForEvents(1, &event);
-        test_error( error, "clWaitForEvent failed");
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        test_error(error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(event_status), &event_status, NULL);
+        test_error(
+            error,
+            "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
         clReleaseEvent(event);
         if (event_status < 0)
             test_error(error, "Kernel execution event returned error");
 
         /* All done */
-        free( threads );
-        free( localThreads );
+        free(threads);
+        free(localThreads);
     }
 
     return retVal;
 }
 
 
-int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     size_t *deviceMaxWorkItemSize;
     unsigned int maxWorkItemDim;
 
     /* Get the max work item dimensions */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL );
-    test_error( error, "Unable to get max work item dimensions from device" );
-
-    log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim);
-    deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim);
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL );
-    test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                            sizeof(maxWorkItemDim), &maxWorkItemDim, NULL);
+    test_error(error, "Unable to get max work item dimensions from device");
+
+    log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n",
+             maxWorkItemDim);
+    deviceMaxWorkItemSize = (size_t *)malloc(sizeof(size_t) * maxWorkItemDim);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                            sizeof(size_t) * maxWorkItemDim,
+                            deviceMaxWorkItemSize, NULL);
+    test_error(error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed");
 
     unsigned int i;
     int errors = 0;
-    for(i=0; i<maxWorkItemDim; i++) {
-        if (deviceMaxWorkItemSize[i]<1) {
-            log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i, deviceMaxWorkItemSize[i]);
+    for (i = 0; i < maxWorkItemDim; i++)
+    {
+        if (deviceMaxWorkItemSize[i] < 1)
+        {
+            log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i,
+                      deviceMaxWorkItemSize[i]);
             errors++;
-        } else {
-            log_info("Dimension %d has max work item size %lu\n", i, deviceMaxWorkItemSize[i]);
+        }
+        else
+        {
+            log_info("Dimension %d has max work item size %lu\n", i,
+                     deviceMaxWorkItemSize[i]);
         }
     }
 
     free(deviceMaxWorkItemSize);
 
-    if (errors)
-        return -1;
+    if (errors) return -1;
     return 0;
 }
 
 
-
-int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_work_group_size(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     int error;
     size_t deviceMaxThreadSize;
 
     /* Get the max thread dimensions */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( deviceMaxThreadSize ), &deviceMaxThreadSize, NULL );
-    test_error( error, "Unable to get max work group size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                            sizeof(deviceMaxThreadSize), &deviceMaxThreadSize,
+                            NULL);
+    test_error(error, "Unable to get max work group size from device");
 
     log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
 
-    if( deviceMaxThreadSize == 0 )
+    if (deviceMaxThreadSize == 0)
     {
-        log_error( "ERROR: Max work group size is reported as zero!\n" );
+        log_error("ERROR: Max work group size is reported as zero!\n");
         return -1;
     }
     return 0;
 }
 
-int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_read_image_args(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     int error;
     unsigned int maxReadImages, i;
@@ -245,48 +283,55 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c
     char readArgLine[128], *programSrc;
     const char *readArgPattern = ", read_only image2d_t srcimg%d";
     clKernelWrapper kernel;
-    clMemWrapper    *streams, result;
+    clMemWrapper *streams, result;
     size_t threads[2];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     size_t maxParameterSize;
     cl_event event;
     cl_int event_status;
-    cl_float image_data[4*4];
+    cl_float image_data[4 * 4];
     float image_result = 0.0f;
     float actual_image_result;
     cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
     cl_device_type deviceType;
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
     image_format_desc.image_channel_order = CL_RGBA;
     image_format_desc.image_channel_data_type = CL_FLOAT;
 
     /* Get the max read image arg count */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof( maxReadImages ), &maxReadImages, NULL );
-    test_error( error, "Unable to get max read image arg count from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS,
+                            sizeof(maxReadImages), &maxReadImages, NULL);
+    test_error(error, "Unable to get max read image arg count from device");
 
-    if( maxReadImages < minRequiredReadImages )
+    if (maxReadImages < minRequiredReadImages)
     {
-        log_error( "ERROR: Reported max read image arg count is less than required! (%d)\n", maxReadImages );
+        log_error("ERROR: Reported max read image arg count is less than "
+                  "required! (%d)\n",
+                  maxReadImages);
         return -1;
     }
 
     log_info("Reported %d max read image args.\n", maxReadImages);
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( deviceAddressSize ), &deviceAddressSize, NULL );
-    test_error( error, "Unable to query CL_DEVICE_ADDRESS_BITS for device" );
+    error =
+        clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
+                        sizeof(deviceAddressSize), &deviceAddressSize, NULL);
+    test_error(error, "Unable to query CL_DEVICE_ADDRESS_BITS for device");
     deviceAddressSize /= 8; // convert from bits to bytes
 
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+                            sizeof(maxParameterSize), &maxParameterSize, NULL);
+    test_error(error, "Unable to get max parameter size from device");
 
     if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
     {
-        error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL );
-        test_error( error, "Unable to get device type from device" );
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_TYPE, sizeof(deviceType),
+                                &deviceType, NULL);
+        test_error(error, "Unable to get device type from device");
 
-        if(deviceType != CL_DEVICE_TYPE_CUSTOM)
+        if (deviceType != CL_DEVICE_TYPE_CUSTOM)
         {
             maxReadImages = 127;
         }
@@ -295,85 +340,107 @@ int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_c
     maxParameterSize -= deviceAddressSize;
 
     // Calculate the number we can use
-    if (maxParameterSize/deviceAddressSize < maxReadImages) {
-        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize));
-        maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize);
+    if (maxParameterSize / deviceAddressSize < maxReadImages)
+    {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+                 "max image arguments.\n",
+                 (int)maxParameterSize,
+                 (int)(maxParameterSize / deviceAddressSize));
+        maxReadImages = (unsigned int)(maxParameterSize / deviceAddressSize);
     }
 
     /* Create a program with that many read args */
-    programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) +
-                                strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240);
+    programSrc = (char *)malloc(strlen(sample_read_image_kernel_pattern[0])
+                                + (strlen(readArgPattern) + 6) * (maxReadImages)
+                                + strlen(sample_read_image_kernel_pattern[1])
+                                + 1 + 40240);
 
-    strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] );
-    strcat( programSrc, "read_only image2d_t srcimg0" );
-    for( i = 0; i < maxReadImages-1; i++ )
+    strcpy(programSrc, sample_read_image_kernel_pattern[0]);
+    strcat(programSrc, "read_only image2d_t srcimg0");
+    for (i = 0; i < maxReadImages - 1; i++)
     {
-        sprintf( readArgLine, readArgPattern, i+1 );
-        strcat( programSrc, readArgLine );
+        sprintf(readArgLine, readArgPattern, i + 1);
+        strcat(programSrc, readArgLine);
     }
-    strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] );
-    for ( i = 0; i < maxReadImages; i++) {
-        sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i);
-        strcat( programSrc, readArgLine );
+    strcat(programSrc, sample_read_image_kernel_pattern[1]);
+    for (i = 0; i < maxReadImages; i++)
+    {
+        sprintf(
+            readArgLine,
+            "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n",
+            i);
+        strcat(programSrc, readArgLine);
     }
-    strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] );
+    strcat(programSrc, sample_read_image_kernel_pattern[2]);
 
-    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
-    test_error( error, "Failed to create the program and kernel.");
-    free( programSrc );
+    error =
+        create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programSrc, "sample_test");
+    test_error(error, "Failed to create the program and kernel.");
+    free(programSrc);
 
     result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
                             &error);
-    test_error( error, "clCreateBufer failed");
+    test_error(error, "clCreateBufer failed");
 
     /* Create some I/O streams */
     streams = new clMemWrapper[maxReadImages + 1];
-    for( i = 0; i < maxReadImages; i++ )
+    for (i = 0; i < maxReadImages; i++)
     {
-        image_data[0]=i;
-        image_result+= image_data[0];
-        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error );
-        test_error( error, "Unable to allocate test image" );
+        image_data[0] = i;
+        image_result += image_data[0];
+        streams[i] =
+            create_image_2d(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
+                            &image_format_desc, 4, 4, 0, image_data, &error);
+        test_error(error, "Unable to allocate test image");
     }
 
-    error = clSetKernelArg( kernel, 0, sizeof( result ), &result );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(result), &result);
+    test_error(error, "Unable to set kernel arguments");
 
     /* Set the arguments */
-    for( i = 1; i < maxReadImages+1; i++ )
+    for (i = 1; i < maxReadImages + 1; i++)
     {
-        error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] );
-        test_error( error, "Unable to set kernel arguments" );
+        error =
+            clSetKernelArg(kernel, i, sizeof(streams[i - 1]), &streams[i - 1]);
+        test_error(error, "Unable to set kernel arguments");
     }
 
     /* Now try running the kernel */
     threads[0] = threads[1] = 1;
-    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
-    test_error( error, "clEnqueueNDRangeKernel failed");
+    error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
+                                   NULL, &event);
+    test_error(error, "clEnqueueNDRangeKernel failed");
 
     // Verify that the event does not return an error from the execution
     error = clWaitForEvents(1, &event);
-    test_error( error, "clWaitForEvent failed");
-    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    test_error(error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(event_status), &event_status, NULL);
+    test_error(error,
+               "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
     clReleaseEvent(event);
     if (event_status < 0)
         test_error(error, "Kernel execution event returned error");
 
-    error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL);
+    error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float),
+                                &actual_image_result, 0, NULL, NULL);
     test_error(error, "clEnqueueReadBuffer failed");
 
     delete[] streams;
 
-    if (actual_image_result != image_result) {
-        log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result);
+    if (actual_image_result != image_result)
+    {
+        log_error("Result failed to verify. Got %g, expected %g.\n",
+                  actual_image_result, image_result);
         return 1;
     }
 
     return 0;
 }
 
-int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_write_image_args(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     unsigned int maxWriteImages, i;
@@ -381,94 +448,117 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_
     char writeArgLine[128], *programSrc;
     const char *writeArgPattern = ", write_only image2d_t dstimg%d";
     clKernelWrapper kernel;
-    clMemWrapper    *streams;
+    clMemWrapper *streams;
     size_t threads[2];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     size_t maxParameterSize;
     cl_event event;
     cl_int event_status;
     cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
 
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
     image_format_desc.image_channel_order = CL_RGBA;
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
 
     /* Get the max read image arg count */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL );
-    test_error( error, "Unable to get max write image arg count from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS,
+                            sizeof(maxWriteImages), &maxWriteImages, NULL);
+    test_error(error, "Unable to get max write image arg count from device");
 
-    if( maxWriteImages == 0 )
+    if (maxWriteImages == 0)
     {
-        log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" );
+        log_info(
+            "WARNING: Device reports 0 for a max write image arg count (write "
+            "image arguments unsupported). Skipping test (implicitly passes). "
+            "This is only valid if the number of image formats is also 0.\n");
         return 0;
     }
 
-    if( maxWriteImages < minRequiredWriteImages )
+    if (maxWriteImages < minRequiredWriteImages)
     {
-        log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages );
+        log_error("ERROR: Reported max write image arg count is less than "
+                  "required! (%d)\n",
+                  maxWriteImages);
         return -1;
     }
 
     log_info("Reported %d max write image args.\n", maxWriteImages);
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+                            sizeof(maxParameterSize), &maxParameterSize, NULL);
+    test_error(error, "Unable to get max parameter size from device");
 
     // Calculate the number we can use
-    if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) {
-        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
-        maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    if (maxParameterSize / sizeof(cl_mem) < maxWriteImages)
+    {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+                 "max image arguments.\n",
+                 (int)maxParameterSize,
+                 (int)(maxParameterSize / sizeof(cl_mem)));
+        maxWriteImages = (unsigned int)(maxParameterSize / sizeof(cl_mem));
     }
 
     /* Create a program with that many write args + 1 */
-    programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) +
-                                strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 );
+    programSrc = (char *)malloc(
+        strlen(sample_write_image_kernel_pattern[0])
+        + (strlen(writeArgPattern) + 6) * (maxWriteImages + 1)
+        + strlen(sample_write_image_kernel_pattern[1]) + 1 + 40240);
 
-    strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] );
-    strcat( programSrc, "write_only image2d_t dstimg0" );
-    for( i = 1; i < maxWriteImages; i++ )
+    strcpy(programSrc, sample_write_image_kernel_pattern[0]);
+    strcat(programSrc, "write_only image2d_t dstimg0");
+    for (i = 1; i < maxWriteImages; i++)
     {
-        sprintf( writeArgLine, writeArgPattern, i );
-        strcat( programSrc, writeArgLine );
+        sprintf(writeArgLine, writeArgPattern, i);
+        strcat(programSrc, writeArgLine);
     }
-    strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] );
-    for ( i = 0; i < maxWriteImages; i++) {
-        sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i);
-        strcat( programSrc, writeArgLine );
+    strcat(programSrc, sample_write_image_kernel_pattern[1]);
+    for (i = 0; i < maxWriteImages; i++)
+    {
+        sprintf(writeArgLine,
+                "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n",
+                i);
+        strcat(programSrc, writeArgLine);
     }
-    strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] );
+    strcat(programSrc, sample_write_image_kernel_pattern[2]);
 
-    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
-    test_error( error, "Failed to create the program and kernel.");
-    free( programSrc );
+    error =
+        create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programSrc, "sample_test");
+    test_error(error, "Failed to create the program and kernel.");
+    free(programSrc);
 
 
     /* Create some I/O streams */
     streams = new clMemWrapper[maxWriteImages + 1];
-    for( i = 0; i < maxWriteImages; i++ )
+    for (i = 0; i < maxWriteImages; i++)
     {
-        streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error );
-        test_error( error, "Unable to allocate test image" );
+        streams[i] =
+            create_image_2d(context, CL_MEM_READ_WRITE, &image_format_desc, 16,
+                            16, 0, NULL, &error);
+        test_error(error, "Unable to allocate test image");
     }
 
     /* Set the arguments */
-    for( i = 0; i < maxWriteImages; i++ )
+    for (i = 0; i < maxWriteImages; i++)
     {
-        error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
-        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+        test_error(error, "Unable to set kernel arguments");
     }
 
     /* Now try running the kernel */
     threads[0] = threads[1] = 16;
-    error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
-    test_error( error, "clEnqueueNDRangeKernel failed.");
+    error = clEnqueueNDRangeKernel(queue, kernel, 2, NULL, threads, NULL, 0,
+                                   NULL, &event);
+    test_error(error, "clEnqueueNDRangeKernel failed.");
 
     // Verify that the event does not return an error from the execution
     error = clWaitForEvents(1, &event);
-    test_error( error, "clWaitForEvent failed");
-    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    test_error(error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(event_status), &event_status, NULL);
+    test_error(error,
+               "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
     clReleaseEvent(event);
     if (event_status < 0)
         test_error(error, "Kernel execution event returned error");
@@ -478,7 +568,8 @@ int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_
     return 0;
 }
 
-int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error;
     cl_ulong maxAllocSize, memSize, minSizeToTry;
@@ -492,61 +583,89 @@ int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_co
         requiredAllocSize = 128 * 1024 * 1024;
 
     /* Get the max mem alloc size */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get max mem alloc size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get max mem alloc size from device");
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
-    test_error( error, "Unable to get global memory size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+                            sizeof(memSize), &memSize, NULL);
+    test_error(error, "Unable to get global memory size from device");
 
-    if (memSize > (cl_ulong)SIZE_MAX) {
-      memSize = (cl_ulong)SIZE_MAX;
+    if (memSize > (cl_ulong)SIZE_MAX)
+    {
+        memSize = (cl_ulong)SIZE_MAX;
     }
 
-    if( maxAllocSize < requiredAllocSize)
+    if (maxAllocSize < requiredAllocSize)
     {
-        log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
+        log_error("ERROR: Reported max allocation size is less than required "
+                  "%lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n",
+                  (requiredAllocSize / 1024) / 1024, maxAllocSize,
+                  (maxAllocSize / 1024) / 1024, (memSize / 1024) / 1024);
         return -1;
     }
 
-    requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4;
+    requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024))
+        ? 1024 * 1024 * 1024
+        : memSize / 4;
 
     if (gIsEmbedded)
-        requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize;
+        requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024)
+            ? 1 * 1024 * 1024
+            : requiredAllocSize;
     else
-    requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize;
+        requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024)
+            ? 128 * 1024 * 1024
+            : requiredAllocSize;
 
-    if( maxAllocSize < requiredAllocSize )
+    if (maxAllocSize < requiredAllocSize)
     {
-        log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 );
+        log_error(
+            "ERROR: Reported max allocation size is less than required of "
+            "total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n",
+            maxAllocSize, (maxAllocSize / 1024) / 1024,
+            (requiredAllocSize / 1024) / 1024);
         return -1;
     }
 
-    log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n",
-             maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0));
+    log_info("Reported max allocation size of %lld bytes (%gMB) and global mem "
+             "size of %lld bytes (%gMB).\n",
+             maxAllocSize, maxAllocSize / (1024.0 * 1024.0), requiredAllocSize,
+             requiredAllocSize / (1024.0 * 1024.0));
 
-    if ( memSize < maxAllocSize ) {
-        log_info("Global memory size is less than max allocation size, using that.\n");
+    if (memSize < maxAllocSize)
+    {
+        log_info("Global memory size is less than max allocation size, using "
+                 "that.\n");
         maxAllocSize = memSize;
     }
 
-    minSizeToTry = maxAllocSize/16;
-    while (maxAllocSize > (maxAllocSize/4)) {
+    minSizeToTry = maxAllocSize / 16;
+    while (maxAllocSize > (maxAllocSize / 4))
+    {
 
-        log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
-        memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error );
-        if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) {
-            log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+        log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n",
+                 maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0));
+        memHdl = clCreateBuffer(context, CL_MEM_READ_ONLY, (size_t)maxAllocSize,
+                                NULL, &error);
+        if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE
+            || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY)
+        {
+            log_info("\tAllocation failed at size of %lld bytes (%gMB).\n",
+                     maxAllocSize, (double)maxAllocSize / (1024.0 * 1024.0));
             maxAllocSize -= minSizeToTry;
             continue;
         }
-        test_error( error, "clCreateBuffer failed for maximum sized buffer.");
+        test_error(error, "clCreateBuffer failed for maximum sized buffer.");
         return 0;
     }
-    log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
+    log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize,
+              (double)maxAllocSize / (1024.0 * 1024.0));
     return -1;
 }
 
-int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
@@ -557,7 +676,7 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
     size_t length;
 
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
     auto version = get_device_cl_version(deviceID);
     if (version == Version(1, 0))
@@ -571,16 +690,20 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
 
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
+                                   CL_MEM_READ_WRITE, 0, &image_format_desc);
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max 2d image width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image 2d width from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image 2d width from device");
 
-    if( maxDimension < minRequiredDimension )
+    if (maxDimension < minRequiredDimension)
     {
-        log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension );
+        log_error(
+            "ERROR: Reported max image 2d width is less than required! (%d)\n",
+            (int)maxDimension);
         return -1;
     }
     log_info("Max reported width is %ld.\n", maxDimension);
@@ -588,34 +711,42 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_co
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size %d x 1 = %gMB.\n",
+             (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                                 maxDimension, 1, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Image 2D creation failed for maximum width" );
+        print_error(error, "Image 2D creation failed for maximum width");
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
@@ -625,7 +756,7 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
     cl_uint minRequiredDimension;
     size_t length;
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
     auto version = get_device_cl_version(deviceID);
     if (version == Version(1, 0))
@@ -638,16 +769,20 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
     }
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D,
+                                   CL_MEM_READ_WRITE, 0, &image_format_desc);
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max 2d image width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image 2d height from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image 2d height from device");
 
-    if( maxDimension < minRequiredDimension )
+    if (maxDimension < minRequiredDimension)
     {
-        log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension );
+        log_error(
+            "ERROR: Reported max image 2d height is less than required! (%d)\n",
+            (int)maxDimension);
         return -1;
     }
     log_info("Max reported height is %ld.\n", maxDimension);
@@ -655,56 +790,67 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_c
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE2D, &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size 1 x %d = %gMB.\n",
+             (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = create_image_2d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                                 1, maxDimension, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Image 2D creation failed for maximum height" );
+        print_error(error, "Image 2D creation failed for maximum height");
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
     clMemWrapper streams[1];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
 
 
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
 
     /* Just get any ol format to test with */
     error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
                                    CL_MEM_READ_ONLY, 0, &image_format_desc);
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max 2d image width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image 3d width from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image 3d width from device");
 
-    if( maxDimension < 2048 )
+    if (maxDimension < 2048)
     {
-        log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension );
+        log_error(
+            "ERROR: Reported max image 3d width is less than required! (%d)\n",
+            (int)maxDimension);
         return -1;
     }
     log_info("Max reported width is %ld.\n", maxDimension);
@@ -712,56 +858,68 @@ int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_co
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 2 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n",
+             (int)maxDimension,
+             (2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                                 maxDimension, 1, 2, 0, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Image 3D creation failed for maximum width" );
+        print_error(error, "Image 3D creation failed for maximum width");
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
     clMemWrapper streams[1];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
 
 
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
 
     /* Just get any ol format to test with */
     error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
                                    CL_MEM_READ_ONLY, 0, &image_format_desc);
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max 2d image width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image 3d height from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image 3d height from device");
 
-    if( maxDimension < 2048 )
+    if (maxDimension < 2048)
     {
-        log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension );
+        log_error(
+            "ERROR: Reported max image 3d height is less than required! (%d)\n",
+            (int)maxDimension);
         return -1;
     }
     log_info("Max reported height is %ld.\n", maxDimension);
@@ -769,27 +927,35 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*2*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 2 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 2 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n",
+             (int)maxDimension,
+             (2 * (float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                                 1, maxDimension, 2, 0, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Image 3D creation failed for maximum height" );
+        print_error(error, "Image 3D creation failed for maximum height");
         return -1;
     }
 
@@ -797,29 +963,33 @@ int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_c
 }
 
 
-int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
     clMemWrapper streams[1];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
 
 
-    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_3D_IMAGE_SUPPORT(deviceID)
 
     /* Just get any ol format to test with */
     error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
                                    CL_MEM_READ_ONLY, 0, &image_format_desc);
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max 2d image width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image 3d depth from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image 3d depth from device");
 
-    if( maxDimension < 2048 )
+    if (maxDimension < 2048)
     {
-        log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension );
+        log_error(
+            "ERROR: Reported max image 3d depth is less than required! (%d)\n",
+            (int)maxDimension);
         return -1;
     }
     log_info("Max reported depth is %ld.\n", maxDimension);
@@ -827,55 +997,67 @@ int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_co
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE3D, &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
+             (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = create_image_3d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                                 1, 1, maxDimension, 0, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Image 3D creation failed for maximum depth" );
+        print_error(error, "Image 3D creation failed for maximum depth");
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_array_size(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimension;
     clMemWrapper streams[1];
-    cl_image_format    image_format_desc;
+    cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
     size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
 
     /* Just get any ol format to test with */
-    error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc );
-    test_error( error, "Unable to obtain suitable image format to test with!" );
+    error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE2D_ARRAY,
+                                   CL_MEM_READ_WRITE, 0, &image_format_desc);
+    test_error(error, "Unable to obtain suitable image format to test with!");
 
     /* Get the max image array width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL );
-    test_error( error, "Unable to get max image array size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,
+                            sizeof(maxDimension), &maxDimension, NULL);
+    test_error(error, "Unable to get max image array size from device");
 
-    if( maxDimension < minRequiredDimension )
+    if (maxDimension < minRequiredDimension)
     {
-        log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension );
+        log_error("ERROR: Reported max image array size is less than required! "
+                  "(%d)\n",
+                  (int)maxDimension);
         return -1;
     }
     log_info("Max reported image array size is %ld.\n", maxDimension);
@@ -883,96 +1065,127 @@ int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_
     /* Verify we can use the format */
     image_format_desc.image_channel_data_type = CL_UNORM_INT8;
     image_format_desc.image_channel_order = CL_RGBA;
-    if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) {
+    if (!is_image_format_supported(context, CL_MEM_READ_ONLY,
+                                   CL_MEM_OBJECT_IMAGE2D_ARRAY,
+                                   &image_format_desc))
+    {
         log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
         return -1;
     }
 
     /* Verify that we can actually allocate an image that large */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
-    if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
-        log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
-                  (cl_ulong)maxDimension*1*4, maxAllocSize);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
+    if ((cl_ulong)maxDimension * 1 * 4 > maxAllocSize)
+    {
+        log_error("Can not allocate a large enough image (min size: %lld "
+                  "bytes, max allowed: %lld bytes) to test.\n",
+                  (cl_ulong)maxDimension * 1 * 4, maxAllocSize);
         return -1;
     }
 
-    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
+    log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n",
+             (int)maxDimension, ((float)maxDimension * 4 / 1024.0 / 1024.0));
 
     /* Try to allocate a very big image */
-    streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] =
+        create_image_2d_array(context, CL_MEM_READ_ONLY, &image_format_desc, 1,
+                              1, maxDimension, 0, 0, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "2D Image Array creation failed for maximum array size" );
+        print_error(error,
+                    "2D Image Array creation failed for maximum array size");
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
     int error;
     size_t maxDimensionPixels;
     clMemWrapper streams[2];
-    cl_image_format image_format_desc = {0};
+    cl_image_format image_format_desc = { 0 };
     cl_ulong maxAllocSize;
     size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
     unsigned int i = 0;
     size_t pixelBytes = 0;
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID);
 
     /* Get the max memory allocation size */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
-    test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE.");
 
     /* Get the max image array width */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL );
-    test_error( error, "Unable to get max image buffer size from device" );
+    error =
+        clGetDeviceInfo(deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
+                        sizeof(maxDimensionPixels), &maxDimensionPixels, NULL);
+    test_error(error, "Unable to get max image buffer size from device");
 
-    if( maxDimensionPixels < minRequiredDimension )
+    if (maxDimensionPixels < minRequiredDimension)
     {
-        log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels );
+        log_error("ERROR: Reported max image buffer size is less than "
+                  "required! (%d)\n",
+                  (int)maxDimensionPixels);
         return -1;
     }
-    log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels);
+    log_info("Max reported image buffer size is %ld pixels.\n",
+             maxDimensionPixels);
 
     pixelBytes = maxAllocSize / maxDimensionPixels;
-    if ( pixelBytes == 0 )
+    if (pixelBytes == 0)
     {
-        log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" );
+        log_error("Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than "
+                  "CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image "
+                  "of maximum size!\n");
         return -1;
     }
 
     error = -1;
-    for ( i = pixelBytes; i > 0; --i )
+    for (i = pixelBytes; i > 0; --i)
     {
-        error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc );
-        if ( error == CL_SUCCESS )
+        error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE1D,
+                                       CL_MEM_READ_ONLY, i, &image_format_desc);
+        if (error == CL_SUCCESS)
         {
             pixelBytes = i;
             break;
         }
     }
-    test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" );
+    test_error(error,
+               "Device does not support format to be used to allocate image of "
+               "CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n");
 
-    log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n",
-        GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0));
+    log_info("Attempting to create an 1D image with channel order %s from "
+             "buffer of size %d = %gMB.\n",
+             GetChannelOrderName(image_format_desc.image_channel_order),
+             (int)maxDimensionPixels,
+             ((float)maxDimensionPixels * pixelBytes / 1024.0 / 1024.0));
 
     /* Try to allocate a buffer */
-    streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
+    streams[0] = clCreateBuffer(context, CL_MEM_READ_ONLY,
+                                maxDimensionPixels * pixelBytes, NULL, &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
     {
-        print_error( error, "Buffer creation failed for maximum image buffer size" );
+        print_error(error,
+                    "Buffer creation failed for maximum image buffer size");
         return -1;
     }
 
     /* Try to allocate a 1D image array from buffer */
-    streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error );
-    if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
-    {
-        print_error( error, "1D Image from buffer creation failed for maximum image buffer size" );
+    streams[1] =
+        create_image_1d(context, CL_MEM_READ_ONLY, &image_format_desc,
+                        maxDimensionPixels, 0, NULL, streams[0], &error);
+    if ((streams[0] == NULL) || (error != CL_SUCCESS))
+    {
+        print_error(error,
+                    "1D Image from buffer creation failed for maximum image "
+                    "buffer size");
         return -1;
     }
 
@@ -980,8 +1193,8 @@ int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl
 }
 
 
-
-int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_parameter_size(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error, retVal, i;
     size_t maxSize;
@@ -1000,62 +1213,78 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
 
 
     /* Get the max param size */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+                            sizeof(maxSize), &maxSize, NULL);
+    test_error(error, "Unable to get max parameter size from device");
 
 
-    if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) )
+    if (((!gIsEmbedded) && (maxSize < 1024))
+        || ((gIsEmbedded) && (maxSize < 256)))
     {
-        log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize );
+        log_error(
+            "ERROR: Reported max parameter size is less than required! (%d)\n",
+            (int)maxSize);
         return -1;
     }
 
     /* The embedded profile without cles_khr_int64 extension does not require
      * longs, so use ints */
     if (embeddedNoLong)
-        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
+        numberOfIntParametersToTry = numberExpected =
+            (maxSize - sizeof(cl_mem)) / sizeof(cl_int);
     else
-        numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
+        numberOfIntParametersToTry = numberExpected =
+            (maxSize - sizeof(cl_mem)) / sizeof(cl_long);
 
-    decrement = (size_t)(numberOfIntParametersToTry/8);
-    if (decrement < 1)
-        decrement = 1;
+    decrement = (size_t)(numberOfIntParametersToTry / 8);
+    if (decrement < 1) decrement = 1;
     log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
 
-    while (numberOfIntParametersToTry > 0) {
-        // These need to be inside to be deallocated automatically on each loop iteration.
+    while (numberOfIntParametersToTry > 0)
+    {
+        // These need to be inside to be deallocated automatically on each loop
+        // iteration.
         clProgramWrapper program;
         clMemWrapper mem;
         clKernelWrapper kernel;
 
         if (embeddedNoLong)
         {
-            log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
-                     numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
-                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+            log_info(
+                "Trying a kernel with %ld int arguments (%ld bytes) and one "
+                "cl_mem (%ld bytes) for %ld bytes total.\n",
+                numberOfIntParametersToTry,
+                sizeof(cl_int) * numberOfIntParametersToTry, sizeof(cl_mem),
+                sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_int));
         }
         else
         {
-            log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
-                     numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem),
-                     sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+            log_info(
+                "Trying a kernel with %ld long arguments (%ld bytes) and one "
+                "cl_mem (%ld bytes) for %ld bytes total.\n",
+                numberOfIntParametersToTry,
+                sizeof(cl_long) * numberOfIntParametersToTry, sizeof(cl_mem),
+                sizeof(cl_mem) + numberOfIntParametersToTry * sizeof(cl_long));
         }
 
         // Allocate memory for the program storage
-        data = malloc(sizeof(cl_long)*numberOfIntParametersToTry);
-
-        argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
-        codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
-        programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024));
+        data = malloc(sizeof(cl_long) * numberOfIntParametersToTry);
+
+        argumentLine =
+            (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
+        codeLines =
+            (char *)malloc(sizeof(char) * numberOfIntParametersToTry * 32);
+        programSrc = (char *)malloc(sizeof(char)
+                                    * (numberOfIntParametersToTry * 64 + 1024));
         argumentLine[0] = '\0';
         codeLines[0] = '\0';
         programSrc[0] = '\0';
 
         // Generate our results
         expectedResult = 0;
-        for (i=0; i<(int)numberOfIntParametersToTry; i++)
-            {
-            if( gHasLong )
+        for (i = 0; i < (int)numberOfIntParametersToTry; i++)
+        {
+            if (gHasLong)
             {
                 ((cl_long *)data)[i] = i;
                 expectedResult += i;
@@ -1068,30 +1297,35 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
         }
 
         // Build the program
-        if( gHasLong)
+        if (gHasLong)
             sprintf(argumentLine, "%s", "long arg0");
         else
             sprintf(argumentLine, "%s", "int arg0");
 
         sprintf(codeLines, "%s", "result[0] += arg0;");
-        for (i=1; i<(int)numberOfIntParametersToTry; i++)
+        for (i = 1; i < (int)numberOfIntParametersToTry; i++)
         {
-            if( gHasLong)
-                sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i);
+            if (gHasLong)
+                sprintf(argumentLine + strlen(argumentLine), ", long arg%d", i);
             else
-                sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i);
+                sprintf(argumentLine + strlen(argumentLine), ", int arg%d", i);
 
-            sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i);
+            sprintf(codeLines + strlen(codeLines), "\nresult[0] += arg%d;", i);
         }
 
         /* Create a kernel to test with */
-        sprintf( programSrc, gHasLong ?  sample_large_parmam_kernel_pattern[0]:
-                                        sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines);
+        sprintf(programSrc,
+                gHasLong ? sample_large_parmam_kernel_pattern[0]
+                         : sample_large_int_parmam_kernel_pattern[0],
+                argumentLine, codeLines);
 
         ptr = programSrc;
-        if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 )
+        if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                        (const char **)&ptr, "sample_test")
+            != 0)
         {
-            log_info("Create program failed, decrementing number of parameters to try.\n");
+            log_info("Create program failed, decrementing number of parameters "
+                     "to try.\n");
             numberOfIntParametersToTry -= decrement;
             continue;
         }
@@ -1103,88 +1337,119 @@ int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_co
                              &error);
         test_error(error, "clCreateBuffer failed");
 
-        for (i=0; i<(int)numberOfIntParametersToTry; i++) {
-            if(gHasLong)
-                error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i]));
+        for (i = 0; i < (int)numberOfIntParametersToTry; i++)
+        {
+            if (gHasLong)
+                error = clSetKernelArg(kernel, i, sizeof(cl_long),
+                                       &(((cl_long *)data)[i]));
             else
-                error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i]));
+                error = clSetKernelArg(kernel, i, sizeof(cl_int),
+                                       &(((cl_int *)data)[i]));
 
-            if (error != CL_SUCCESS) {
-                log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+            if (error != CL_SUCCESS)
+            {
+                log_info("clSetKernelArg failed (%s), decrementing number of "
+                         "parameters to try.\n",
+                         IGetErrorString(error));
                 numberOfIntParametersToTry -= decrement;
                 break;
             }
         }
-        if (error != CL_SUCCESS)
-            continue;
+        if (error != CL_SUCCESS) continue;
 
 
         error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
-        if (error != CL_SUCCESS) {
-            log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+        if (error != CL_SUCCESS)
+        {
+            log_info("clSetKernelArg failed (%s), decrementing number of "
+                     "parameters to try.\n",
+                     IGetErrorString(error));
             numberOfIntParametersToTry -= decrement;
             continue;
         }
 
-        size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
-        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
-        if (error != CL_SUCCESS) {
-            log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
+        size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim,
+                                       localDim, 0, NULL, &event);
+        if (error != CL_SUCCESS)
+        {
+            log_info("clEnqueueNDRangeKernel failed (%s), decrementing number "
+                     "of parameters to try.\n",
+                     IGetErrorString(error));
             numberOfIntParametersToTry -= decrement;
             continue;
         }
 
         // Verify that the event does not return an error from the execution
         error = clWaitForEvents(1, &event);
-        test_error( error, "clWaitForEvent failed");
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        test_error(error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(event_status), &event_status, NULL);
+        test_error(
+            error,
+            "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
         clReleaseEvent(event);
         if (event_status < 0)
             test_error(error, "Kernel execution event returned error");
 
-        if(gHasLong)
-            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL);
+        if (gHasLong)
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long),
+                                        &long_result, 0, NULL, NULL);
         else
-            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL);
+            error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int),
+                                        &int_result, 0, NULL, NULL);
 
         test_error(error, "clEnqueueReadBuffer failed")
 
-        free(data);
+            free(data);
         free(argumentLine);
         free(codeLines);
         free(programSrc);
 
-        if(gHasLong)
+        if (gHasLong)
         {
-            if (long_result != expectedResult) {
-                log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result);
+            if (long_result != expectedResult)
+            {
+                log_error("Expected result (%lld) does not equal actual result "
+                          "(%lld).\n",
+                          expectedResult, long_result);
                 numberOfIntParametersToTry -= decrement;
                 continue;
-            } else {
-                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
+            }
+            else
+            {
+                log_info("Results verified at %ld bytes of arguments.\n",
+                         sizeof(cl_mem)
+                             + numberOfIntParametersToTry * sizeof(cl_long));
                 break;
             }
         }
         else
         {
-            if (int_result != expectedResult) {
-                log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result);
+            if (int_result != expectedResult)
+            {
+                log_error("Expected result (%lld) does not equal actual result "
+                          "(%d).\n",
+                          expectedResult, int_result);
                 numberOfIntParametersToTry -= decrement;
                 continue;
-            } else {
-                log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
+            }
+            else
+            {
+                log_info("Results verified at %ld bytes of arguments.\n",
+                         sizeof(cl_mem)
+                             + numberOfIntParametersToTry * sizeof(cl_int));
                 break;
             }
         }
     }
 
-    if (numberOfIntParametersToTry == (long)numberExpected)
-        return 0;
+    if (numberOfIntParametersToTry == (long)numberExpected) return 0;
     return -1;
 }
 
-int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_samplers(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
     int error;
     cl_uint maxSamplers, i;
@@ -1197,104 +1462,124 @@ int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_
     cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
 
 
-    PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
+    PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
     /* Get the max value */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL );
-    test_error( error, "Unable to get max sampler count from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_SAMPLERS,
+                            sizeof(maxSamplers), &maxSamplers, NULL);
+    test_error(error, "Unable to get max sampler count from device");
 
-    if( maxSamplers < minRequiredSamplers )
+    if (maxSamplers < minRequiredSamplers)
     {
-        log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers );
+        log_error(
+            "ERROR: Reported max sampler count is less than required! (%d)\n",
+            (int)maxSamplers);
         return -1;
     }
 
     log_info("Reported max %d samplers.\n", maxSamplers);
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+                            sizeof(maxParameterSize), &maxParameterSize, NULL);
+    test_error(error, "Unable to get max parameter size from device");
 
     // Subtract the size of the result
-    maxParameterSize -= 2*sizeof(cl_mem);
+    maxParameterSize -= 2 * sizeof(cl_mem);
 
     // Calculate the number we can use
-    if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) {
-        log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler)));
-        maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler));
+    if (maxParameterSize / sizeof(cl_sampler) < maxSamplers)
+    {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+                 "max sampler arguments.\n",
+                 (int)maxParameterSize,
+                 (int)(maxParameterSize / sizeof(cl_sampler)));
+        maxSamplers = (unsigned int)(maxParameterSize / sizeof(cl_sampler));
     }
 
     /* Create a kernel to test with */
-    programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) +
-                                strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) +
-                                ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers +
-                                strlen( sample_sampler_kernel_pattern[ 4 ] ) );
-    strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] );
-    for( i = 0; i < maxSamplers; i++ )
+    programSrc = (char *)malloc(
+        (strlen(sample_sampler_kernel_pattern[1]) + 8) * (maxSamplers)
+        + strlen(sample_sampler_kernel_pattern[0])
+        + strlen(sample_sampler_kernel_pattern[2])
+        + (strlen(sample_sampler_kernel_pattern[3]) + 8) * maxSamplers
+        + strlen(sample_sampler_kernel_pattern[4]));
+    strcpy(programSrc, sample_sampler_kernel_pattern[0]);
+    for (i = 0; i < maxSamplers; i++)
     {
-        sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i );
-        strcat( programSrc, samplerLine );
+        sprintf(samplerLine, sample_sampler_kernel_pattern[1], i);
+        strcat(programSrc, samplerLine);
     }
-    strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] );
-    for( i = 0; i < maxSamplers; i++ )
+    strcat(programSrc, sample_sampler_kernel_pattern[2]);
+    for (i = 0; i < maxSamplers; i++)
     {
-        sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i );
-        strcat( programSrc, samplerLine );
+        sprintf(samplerLine, sample_sampler_kernel_pattern[3], i);
+        strcat(programSrc, samplerLine);
     }
-    strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] );
+    strcat(programSrc, sample_sampler_kernel_pattern[4]);
 
 
-    error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
-    test_error( error, "Failed to create the program and kernel.");
+    error =
+        create_single_kernel_helper(context, &program, &kernel, 1,
+                                    (const char **)&programSrc, "sample_test");
+    test_error(error, "Failed to create the program and kernel.");
 
     // We have to set up some fake parameters so it'll work
     clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
 
-    clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
-    test_error( error, "Unable to create a test image" );
+    clMemWrapper image = create_image_2d(context, CL_MEM_READ_WRITE, &format,
+                                         16, 16, 0, NULL, &error);
+    test_error(error, "Unable to create a test image");
 
     clMemWrapper stream =
         clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
-    test_error( error, "Unable to create test buffer" );
+    test_error(error, "Unable to create test buffer");
 
-    error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
-    error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream );
-    test_error( error, "Unable to set kernel arguments" );
-    for( i = 0; i < maxSamplers; i++ )
+    error = clSetKernelArg(kernel, 0, sizeof(cl_mem), &image);
+    error |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &stream);
+    test_error(error, "Unable to set kernel arguments");
+    for (i = 0; i < maxSamplers; i++)
     {
-        samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
-        test_error( error, "Unable to create sampler" );
+        samplers[i] = clCreateSampler(context, CL_FALSE, CL_ADDRESS_NONE,
+                                      CL_FILTER_NEAREST, &error);
+        test_error(error, "Unable to create sampler");
 
-        error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] );
-        test_error( error, "Unable to set sampler argument" );
+        error = clSetKernelArg(kernel, 2 + i, sizeof(cl_sampler), &samplers[i]);
+        test_error(error, "Unable to set sampler argument");
     }
 
-    size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
-    test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers.");
+    size_t globalDim[3] = { 1, 1, 1 }, localDim[3] = { 1, 1, 1 };
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim,
+                                   0, NULL, &event);
+    test_error(
+        error,
+        "clEnqueueNDRangeKernel failed with maximum number of samplers.");
 
     // Verify that the event does not return an error from the execution
     error = clWaitForEvents(1, &event);
-    test_error( error, "clWaitForEvent failed");
-    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    test_error(error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(event_status), &event_status, NULL);
+    test_error(error,
+               "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
     clReleaseEvent(event);
     if (event_status < 0)
         test_error(error, "Kernel execution event returned error");
 
-    free( programSrc );
+    free(programSrc);
     delete[] samplers;
     return 0;
 }
 
 #define PASSING_FRACTION 4
-int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
 {
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    size_t    threads[1], localThreads[1];
+    size_t threads[1], localThreads[1];
     cl_int *constantData, *resultData;
     cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
     int i;
@@ -1303,48 +1588,56 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
     MTdata d;
 
     /* Verify our test buffer won't be bigger than allowed */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
-    test_error( error, "Unable to get max constant buffer size" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+                            sizeof(maxSize), &maxSize, 0);
+    test_error(error, "Unable to get max constant buffer size");
 
-    if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize <  1L * 1024L )
+    if ((0 == gIsEmbedded && maxSize < 64L * 1024L) || maxSize < 1L * 1024L)
     {
-        log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) );
+        log_error("ERROR: Reported max constant buffer size less than required "
+                  "by OpenCL 1.0 (reported %d KB)\n",
+                  (int)(maxSize / 1024L));
         return -1;
     }
 
     log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
 
     // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+                            sizeof(maxGlobalSize), &maxGlobalSize, 0);
     test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
 
-    if (maxSize > maxGlobalSize / 8)
-        maxSize = maxGlobalSize / 8;
+    if (maxSize > maxGlobalSize / 8) maxSize = maxGlobalSize / 8;
 
-    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, 0);
     test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
-    
-    if (maxSize > maxAllocSize)
-        maxSize = maxAllocSize;
-    
+
+    if (maxSize > maxAllocSize) maxSize = maxAllocSize;
+
     /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    sample_const_arg_kernel, "sample_test")
+        != 0)
     {
         return -1;
     }
 
     /* Try the returned max size and decrease it until we get one that works. */
-    stepSize = maxSize/16;
+    stepSize = maxSize / 16;
     currentSize = maxSize;
     int allocPassed = 0;
-    d = init_genrand( gRandomSeed );
-    while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) {
-        log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize);
+    d = init_genrand(gRandomSeed);
+    while (!allocPassed && currentSize >= maxSize / PASSING_FRACTION)
+    {
+        log_info("Attempting to allocate constant buffer of size %lld bytes\n",
+                 maxSize);
 
         /* Create some I/O streams */
-        size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int);
-        size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
-        constantData = (cl_int *)malloc( sizeToAllocate);
+        size_t sizeToAllocate =
+            ((size_t)currentSize / sizeof(cl_int)) * sizeof(cl_int);
+        size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
+        constantData = (cl_int *)malloc(sizeToAllocate);
         if (constantData == NULL)
         {
             log_error("Failed to allocate memory for constantData!\n");
@@ -1352,53 +1645,74 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
             return EXIT_FAILURE;
         }
 
-        for(i=0; i<(int)(numberOfInts); i++)
+        for (i = 0; i < (int)(numberOfInts); i++)
             constantData[i] = (int)genrand_int32(d);
 
         clMemWrapper streams[3];
         streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                     sizeToAllocate, constantData, &error);
-        test_error( error, "Creating test array failed" );
+        test_error(error, "Creating test array failed");
         streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
                                     NULL, &error);
-        test_error( error, "Creating test array failed" );
+        test_error(error, "Creating test array failed");
 
 
         /* Set the arguments */
-        error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
-        test_error( error, "Unable to set indexed kernel arguments" );
-        error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
-        test_error( error, "Unable to set indexed kernel arguments" );
+        error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+        test_error(error, "Unable to set indexed kernel arguments");
+        error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+        test_error(error, "Unable to set indexed kernel arguments");
 
 
         /* Test running the kernel and verifying it */
         threads[0] = numberOfInts;
         localThreads[0] = 1;
-        log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int)));
-
-        error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
-        /* If we failed due to a resource issue, reduce the size and try again. */
-        if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) {
-            log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize);
+        log_info("Filling constant buffer with %d cl_ints (%d bytes).\n",
+                 (int)threads[0], (int)(threads[0] * sizeof(cl_int)));
+
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                       localThreads, 0, NULL, &event);
+        /* If we failed due to a resource issue, reduce the size and try again.
+         */
+        if ((error == CL_OUT_OF_RESOURCES)
+            || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+            || (error == CL_OUT_OF_HOST_MEMORY))
+        {
+            log_info("Kernel enqueue failed at size %lld, trying at a reduced "
+                     "size.\n",
+                     currentSize);
             currentSize -= stepSize;
             free(constantData);
             continue;
         }
-        test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
+        test_error(
+            error,
+            "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
 
         // Verify that the event does not return an error from the execution
         error = clWaitForEvents(1, &event);
-        test_error( error, "clWaitForEvent failed");
-        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-        test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+        test_error(error, "clWaitForEvent failed");
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(event_status), &event_status, NULL);
+        test_error(
+            error,
+            "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
         clReleaseEvent(event);
-        if (event_status < 0) {
-            if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) {
-                log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize);
+        if (event_status < 0)
+        {
+            if ((event_status == CL_OUT_OF_RESOURCES)
+                || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE)
+                || (event_status == CL_OUT_OF_HOST_MEMORY))
+            {
+                log_info("Kernel event indicates failure at size %lld, trying "
+                         "at a reduced size.\n",
+                         currentSize);
                 currentSize -= stepSize;
                 free(constantData);
                 continue;
-            } else {
+            }
+            else
+            {
                 test_error(error, "Kernel execution event returned error");
             }
         }
@@ -1415,30 +1729,41 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
             return EXIT_FAILURE;
         }
 
-        error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
-        test_error( error, "clEnqueueReadBuffer failed");
+        error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0,
+                                    sizeToAllocate, resultData, 0, NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
 
-        for(i=0; i<(int)(numberOfInts); i++)
-            if (constantData[i] != resultData[i]) {
-                log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n",
+        for (i = 0; i < (int)(numberOfInts); i++)
+            if (constantData[i] != resultData[i])
+            {
+                log_error("Data failed to verify: constantData[%d]=%d != "
+                          "resultData[%d]=%d\n",
                           i, constantData[i], i, resultData[i]);
-                free( constantData );
+                free(constantData);
                 free(resultData);
-                free_mtdata(d);   d = NULL;
+                free_mtdata(d);
+                d = NULL;
                 return -1;
             }
 
-        free( constantData );
+        free(constantData);
         free(resultData);
     }
-    free_mtdata(d);   d = NULL;
+    free_mtdata(d);
+    d = NULL;
 
-    if (allocPassed) {
-        if (currentSize < maxSize/PASSING_FRACTION) {
-            log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
+    if (allocPassed)
+    {
+        if (currentSize < maxSize / PASSING_FRACTION)
+        {
+            log_error("Failed to allocate at least 1/8 of the reported "
+                      "constant size.\n");
             return -1;
-        } else if (currentSize != maxSize) {
-            log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
+        }
+        else if (currentSize != maxSize)
+        {
+            log_info("Passed at reduced size. (%lld of %lld bytes)\n",
+                     currentSize, maxSize);
             return 0;
         }
         return 0;
@@ -1446,13 +1771,14 @@ int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context,
     return -1;
 }
 
-int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_constant_args(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements)
 {
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper    *streams;
-    size_t    threads[1], localThreads[1];
+    clMemWrapper *streams;
+    size_t threads[1], localThreads[1];
     cl_uint i, maxArgs;
     cl_ulong maxSize;
     cl_ulong maxParameterSize;
@@ -1465,119 +1791,145 @@ int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_com
 
 
     /* Verify our test buffer won't be bigger than allowed */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 );
-    test_error( error, "Unable to get max constant arg count" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_ARGS,
+                            sizeof(maxArgs), &maxArgs, 0);
+    test_error(error, "Unable to get max constant arg count");
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
-    test_error( error, "Unable to get max parameter size from device" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_PARAMETER_SIZE,
+                            sizeof(maxParameterSize), &maxParameterSize, NULL);
+    test_error(error, "Unable to get max parameter size from device");
 
     // Subtract the size of the result
     maxParameterSize -= sizeof(cl_mem);
 
     // Calculate the number we can use
-    if (maxParameterSize/sizeof(cl_mem) < maxArgs) {
-        log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
-        maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem));
+    if (maxParameterSize / sizeof(cl_mem) < maxArgs)
+    {
+        log_info("WARNING: Max parameter size of %d bytes limits test to %d "
+                 "max image arguments.\n",
+                 (int)maxParameterSize,
+                 (int)(maxParameterSize / sizeof(cl_mem)));
+        maxArgs = (unsigned int)(maxParameterSize / sizeof(cl_mem));
     }
 
 
-    if( maxArgs < (gIsEmbedded ? 4 : 8) )
+    if (maxArgs < (gIsEmbedded ? 4 : 8))
     {
-        log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs );
+        log_error("ERROR: Reported max constant arg count less than required "
+                  "by OpenCL 1.0 (reported %d)\n",
+                  (int)maxArgs);
         return -1;
     }
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
-    test_error( error, "Unable to get max constant buffer size" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
+                            sizeof(maxSize), &maxSize, 0);
+    test_error(error, "Unable to get max constant buffer size");
     individualBufferSize = (maxSize / 2) / maxArgs;
 
-    log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
-             (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
+    log_info(
+        "Reported max constant arg count of %u and max constant buffer "
+        "size of %llu. Test will attempt to allocate half of that, or %llu "
+        "buffers of size %zu.\n",
+        maxArgs, maxSize, maxArgs, individualBufferSize);
 
-    str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2));
-    constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2));
-    programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024);
+    str2 = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
+    constArgs = (char *)malloc(sizeof(char) * 32 * (maxArgs + 2));
+    programSrc = (char *)malloc(sizeof(char) * 32 * 2 * (maxArgs + 2) + 1024);
 
     /* Create a test program */
     constArgs[0] = 0;
     str2[0] = 0;
-    for( i = 0; i < maxArgs-1; i++ )
-    {
-        sprintf( str, ", __constant int *src%d", (int)( i + 2 ) );
-        strcat( constArgs, str );
-        sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2));
-        if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) {
-            log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i);
+    for (i = 0; i < maxArgs - 1; i++)
+    {
+        sprintf(str, ", __constant int *src%d", (int)(i + 2));
+        strcat(constArgs, str);
+        sprintf(str2 + strlen(str2), "\tdst[tid] += src%d[tid];\n",
+                (int)(i + 2));
+        if (strlen(str2) > (sizeof(char) * 32 * (maxArgs + 2) - 32)
+            || strlen(constArgs) > (sizeof(char) * 32 * (maxArgs + 2) - 32))
+        {
+            log_info("Limiting number of arguments tested to %d due to test "
+                     "program allocation size.\n",
+                     i);
             break;
         }
     }
-    sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 );
+    sprintf(programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2);
 
     /* Create a kernel to test with */
     ptr = programSrc;
-    if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 )
+    if (create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
+                                    "sample_test")
+        != 0)
     {
         return -1;
     }
 
     /* Create some I/O streams */
-    streams = new clMemWrapper[ maxArgs + 1 ];
-    for( i = 0; i < maxArgs + 1; i++ )
+    streams = new clMemWrapper[maxArgs + 1];
+    for (i = 0; i < maxArgs + 1; i++)
     {
         streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                     individualBufferSize, NULL, &error);
-        test_error( error, "Creating test array failed" );
+        test_error(error, "Creating test array failed");
     }
 
     /* Set the arguments */
-    for( i = 0; i < maxArgs + 1; i++ )
+    for (i = 0; i < maxArgs + 1; i++)
     {
-        error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
-        test_error( error, "Unable to set kernel argument" );
+        error = clSetKernelArg(kernel, i, sizeof(streams[i]), &streams[i]);
+        test_error(error, "Unable to set kernel argument");
     }
 
     /* Test running the kernel and verifying it */
     threads[0] = (size_t)10;
-    while (threads[0]*sizeof(cl_int) > individualBufferSize)
-        threads[0]--;
+    while (threads[0] * sizeof(cl_int) > individualBufferSize) threads[0]--;
 
-    error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error = get_max_common_work_group_size(context, kernel, threads[0],
+                                           &localThreads[0]);
+    test_error(error, "Unable to get work group size to use");
 
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
-    test_error( error, "clEnqueueNDRangeKernel failed");
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event);
+    test_error(error, "clEnqueueNDRangeKernel failed");
 
     // Verify that the event does not return an error from the execution
     error = clWaitForEvents(1, &event);
-    test_error( error, "clWaitForEvent failed");
-    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
-    test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
+    test_error(error, "clWaitForEvent failed");
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(event_status), &event_status, NULL);
+    test_error(error,
+               "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
     clReleaseEvent(event);
     if (event_status < 0)
         test_error(error, "Kernel execution event returned error");
 
     error = clFinish(queue);
-    test_error( error, "clFinish failed.");
+    test_error(error, "clFinish failed.");
 
-    delete [] streams;
+    delete[] streams;
     free(str2);
     free(constArgs);
     free(programSrc);
     return 0;
 }
 
-int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_compute_units(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements)
 {
     int error;
     cl_uint value;
 
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get compute unit count" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
+                            sizeof(value), &value, 0);
+    test_error(error, "Unable to get compute unit count");
 
-    if( value < 1 )
+    if (value < 1)
     {
-        log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value );
+        log_error("ERROR: Reported compute unit count less than required by "
+                  "OpenCL 1.0 (reported %d)\n",
+                  (int)value);
         return -1;
     }
 
@@ -1586,18 +1938,22 @@ int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_com
     return 0;
 }
 
-int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_address_bits(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     int error;
     cl_uint value;
 
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get address bit count" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(value),
+                            &value, 0);
+    test_error(error, "Unable to get address bit count");
 
-    if( value != 32 && value != 64 )
+    if (value != 32 && value != 64)
     {
-        log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value );
+        log_error("ERROR: Reported address bit count not valid by OpenCL 1.0 "
+                  "(reported %d)\n",
+                  (int)value);
         return -1;
     }
 
@@ -1606,68 +1962,84 @@ int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_comm
     return 0;
 }
 
-int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     cl_device_fp_config value;
     char profile[128] = "";
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get device single fp config" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(value),
+                            &value, 0);
+    test_error(error, "Unable to get device single fp config");
 
-    //Check to see if we are an embedded profile device
-    if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL )))
+    // Check to see if we are an embedded profile device
+    if ((error = clGetDeviceInfo(deviceID, CL_DEVICE_PROFILE, sizeof(profile),
+                                 profile, NULL)))
     {
-        log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error );
+        log_error("FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n",
+                  error);
         return error;
     }
 
-    if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ))
+    if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
     { // embedded device
 
-        if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
+        if (0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
         {
-            log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
+            log_error("FAILURE: embedded device supports neither "
+                      "CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n");
             return -1;
         }
     }
     else
     { // Full profile
-        if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) )
+        if ((value & (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
+            != (CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN))
         {
-            log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+            log_error("ERROR: Reported single fp config doesn't meet minimum "
+                      "set by OpenCL 1.0 (reported 0x%08x)\n",
+                      (int)value);
             return -1;
         }
     }
     return 0;
 }
 
-int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     cl_device_fp_config value;
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get device double fp config" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(value),
+                            &value, 0);
+    test_error(error, "Unable to get device double fp config");
 
-    if (value == 0)
-        return 0;
+    if (value == 0) return 0;
 
-    if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) )
+    if ((value
+         & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
+            | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
+        != (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO
+            | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM))
     {
-        log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        log_error("ERROR: Reported double fp config doesn't meet minimum set "
+                  "by OpenCL 1.0 (reported 0x%08x)\n",
+                  (int)value);
         return -1;
     }
     return 0;
 }
 
-int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     int error;
     clProgramWrapper program;
     clKernelWrapper kernel;
-    clMemWrapper            streams[3];
-    size_t    threads[1], localThreads[1];
+    clMemWrapper streams[3];
+    size_t threads[1], localThreads[1];
     cl_int *localData, *resultData;
     cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
     Version device_version;
@@ -1676,8 +2048,9 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
     MTdata d;
 
     /* Verify our test buffer won't be bigger than allowed */
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
-    test_error( error, "Unable to get max local buffer size" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(maxSize),
+                            &maxSize, 0);
+    test_error(error, "Unable to get max local buffer size");
 
     try
     {
@@ -1709,65 +2082,80 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
         return -1;
     }
 
-    log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize);
+    log_info("Reported max local buffer size for device: %lld bytes.\n",
+             maxSize);
 
     /* Create a kernel to test with */
-    if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    sample_local_arg_kernel, "sample_test")
+        != 0)
     {
         return -1;
     }
 
-    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL);
-    test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
+    error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE,
+                                     sizeof(kernelLocalUsage),
+                                     &kernelLocalUsage, NULL);
+    test_error(error,
+               "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
 
-    log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage);
+    log_info("Reported local buffer usage for kernel "
+             "(CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n",
+             kernelLocalUsage);
 
     /* Create some I/O streams */
-    size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int);
-    size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
+    size_t sizeToAllocate =
+        ((size_t)(maxSize - kernelLocalUsage) / sizeof(cl_int))
+        * sizeof(cl_int);
+    size_t numberOfInts = sizeToAllocate / sizeof(cl_int);
 
-    log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate);
+    log_info("Attempting to use %zu bytes of local memory.\n", sizeToAllocate);
 
-    localData = (cl_int *)malloc( sizeToAllocate );
-    d = init_genrand( gRandomSeed );
-    for(i=0; i<(int)(numberOfInts); i++)
+    localData = (cl_int *)malloc(sizeToAllocate);
+    d = init_genrand(gRandomSeed);
+    for (i = 0; i < (int)(numberOfInts); i++)
         localData[i] = (int)genrand_int32(d);
-    free_mtdata(d); d = NULL;
+    free_mtdata(d);
+    d = NULL;
 
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
                                 localData, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
                                 NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
 
     /* Set the arguments */
     error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]);
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]);
-    test_error( error, "Unable to set indexed kernel arguments" );
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 2, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
 
 
     /* Test running the kernel and verifying it */
     threads[0] = numberOfInts;
     localThreads[0] = 1;
-    log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate);
+    log_info("Creating local buffer with %zu cl_ints (%zu bytes).\n",
+             numberOfInts, sizeToAllocate);
 
     cl_event evt;
-    cl_int   evt_err;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt );
+    cl_int evt_err;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &evt);
     test_error(error, "clEnqueueNDRangeKernel failed");
 
     error = clFinish(queue);
-    test_error( error, "clFinish failed");
+    test_error(error, "clFinish failed");
 
-    error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL);
-    test_error( error, "clGetEventInfo with maximum local buffer size failed.");
+    error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof evt_err, &evt_err, NULL);
+    test_error(error, "clGetEventInfo with maximum local buffer size failed.");
 
-    if (evt_err != CL_COMPLETE) {
+    if (evt_err != CL_COMPLETE)
+    {
         print_error(evt_err, "Kernel event returned error");
         clReleaseEvent(evt);
         return -1;
@@ -1775,95 +2163,118 @@ int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_co
 
     resultData = (cl_int *)malloc(sizeToAllocate);
 
-    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
-    test_error( error, "clEnqueueReadBuffer failed");
+    error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate,
+                                resultData, 0, NULL, NULL);
+    test_error(error, "clEnqueueReadBuffer failed");
 
-    for(i=0; i<(int)(numberOfInts); i++)
-        if (localData[i] != resultData[i]) {
+    for (i = 0; i < (int)(numberOfInts); i++)
+        if (localData[i] != resultData[i])
+        {
             clReleaseEvent(evt);
-            free( localData );
+            free(localData);
             free(resultData);
             log_error("Results failed to verify.\n");
             return -1;
         }
     clReleaseEvent(evt);
-    free( localData );
+    free(localData);
     free(resultData);
 
     return err;
 }
 
-int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_kernel_preferred_work_group_size_multiple(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
-    int                err;
+    int err;
     clProgramWrapper program;
     clKernelWrapper kernel;
 
     size_t max_local_workgroup_size[3];
     size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
 
-    err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" );
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      sample_local_arg_kernel, "sample_test");
     test_error(err, "Failed to build kernel/program.");
 
     err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
-                                   sizeof(max_workgroup_size), &max_workgroup_size, NULL);
+                                   sizeof(max_workgroup_size),
+                                   &max_workgroup_size, NULL);
     test_error(err, "clGetKernelWorkgroupInfo failed.");
 
-    err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
-                                   sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
+    err = clGetKernelWorkGroupInfo(
+        kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
+        sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
     test_error(err, "clGetKernelWorkgroupInfo failed.");
 
-    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                          sizeof(max_local_workgroup_size),
+                          max_local_workgroup_size, NULL);
     test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
 
-    // Since the preferred size is only a performance hint, we can only really check that we get a sane value
-    // back
-    log_info( "size: %ld     preferred: %ld      max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] );
+    // Since the preferred size is only a performance hint, we can only really
+    // check that we get a sane value back
+    log_info("size: %ld     preferred: %ld      max: %ld\n", max_workgroup_size,
+             preferred_workgroup_size, max_local_workgroup_size[0]);
 
-    if( preferred_workgroup_size > max_workgroup_size )
+    if (preferred_workgroup_size > max_workgroup_size)
     {
-        log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size );
+        log_error("ERROR: Reported preferred workgroup multiple larger than "
+                  "max workgroup size (preferred %ld, max %ld)\n",
+                  preferred_workgroup_size, max_workgroup_size);
         return -1;
     }
 
     return 0;
 }
 
-int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_execution_capabilities(cl_device_id deviceID,
+                                        cl_context context,
+                                        cl_command_queue queue,
+                                        int num_elements)
 {
     int error;
     cl_device_exec_capabilities value;
 
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get execution capabilities" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_EXECUTION_CAPABILITIES,
+                            sizeof(value), &value, 0);
+    test_error(error, "Unable to get execution capabilities");
 
-    if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL )
+    if ((value & CL_EXEC_KERNEL) != CL_EXEC_KERNEL)
     {
-        log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        log_error("ERROR: Reported execution capabilities less than required "
+                  "by OpenCL 1.0 (reported 0x%08x)\n",
+                  (int)value);
         return -1;
     }
     return 0;
 }
 
-int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_queue_properties(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     int error;
     cl_command_queue_properties value;
 
 
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 );
-    test_error( error, "Unable to get queue properties" );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
+                            sizeof(value), &value, 0);
+    test_error(error, "Unable to get queue properties");
 
-    if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE )
+    if ((value & CL_QUEUE_PROFILING_ENABLE) != CL_QUEUE_PROFILING_ENABLE)
     {
-        log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
+        log_error("ERROR: Reported queue properties less than required by "
+                  "OpenCL 1.0 (reported 0x%08x)\n",
+                  (int)value);
         return -1;
     }
     return 0;
 }
 
-int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_device_version(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     // Query for the device version.
     Version device_cl_version = get_device_cl_version(deviceID);
@@ -1959,84 +2370,101 @@ int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_co
     return 0;
 }
 
-int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_min_max_language_version(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     cl_int error;
-    cl_char buffer[ 4098 ];
+    cl_char buffer[4098];
     size_t length;
 
     // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
-    error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length );
-    test_error( error, "Unable to get device opencl c version string" );
-    if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 )
-    {
-        log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer );
+    error = clGetDeviceInfo(deviceID, CL_DEVICE_OPENCL_C_VERSION,
+                            sizeof(buffer), buffer, &length);
+    test_error(error, "Unable to get device opencl c version string");
+    if (memcmp(buffer, "OpenCL C ", strlen("OpenCL C ")) != 0)
+    {
+        log_error("ERROR: Initial part of device language version string does "
+                  "not match required format! (returned: \"%s\")\n",
+                  (char *)buffer);
         return -1;
     }
 
     log_info("Returned version \"%s\".\n", buffer);
 
-    char *p1 = (char *)buffer + strlen( "OpenCL C " );
-    while( *p1 == ' ' )
-        p1++;
+    char *p1 = (char *)buffer + strlen("OpenCL C ");
+    while (*p1 == ' ') p1++;
     char *p2 = p1;
-    if( ! isdigit(*p2) )
+    if (!isdigit(*p2))
     {
-        log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer );
+        log_error("ERROR: Major revision number must follow space behind "
+                  "OpenCL C! (returned %s)\n",
+                  (char *)buffer);
         return -1;
     }
-    while( isdigit( *p2 ) )
-        p2++;
-    if( *p2 != '.' )
+    while (isdigit(*p2)) p2++;
+    if (*p2 != '.')
     {
-        log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
+        log_error("ERROR: Version number must contain a decimal point! "
+                  "(returned: %s)\n",
+                  (char *)buffer);
         return -1;
     }
     char *p3 = p2 + 1;
-    if( ! isdigit(*p3) )
+    if (!isdigit(*p3))
     {
-        log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
+        log_error("ERROR: Minor revision number is missing or does not abut "
+                  "the decimal point! (returned %s)\n",
+                  (char *)buffer);
         return -1;
     }
-    while( isdigit( *p3 ) )
-        p3++;
-    if( *p3 != ' ' )
+    while (isdigit(*p3)) p3++;
+    if (*p3 != ' ')
     {
-        log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
+        log_error("ERROR: A space must appear after the minor version! "
+                  "(returned: %s)\n",
+                  (char *)buffer);
         return -1;
     }
     *p2 = ' '; // Put in a space for atoi below.
     p2++;
 
-    int major = atoi( p1 );
-    int minor = atoi( p2 );
+    int major = atoi(p1);
+    int minor = atoi(p2);
     int minor_revision = 2;
 
-    if( major * 10 + minor < 10 + minor_revision )
+    if (major * 10 + minor < 10 + minor_revision)
     {
-        // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set.
-        if( getenv("OPENCL_1_0_DEVICE"))
+        // If the language version did not match, check to see if
+        // OPENCL_1_0_DEVICE is set.
+        if (getenv("OPENCL_1_0_DEVICE"))
         {
-          log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined!  This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
+            log_info("WARNING: This test was run with OPENCL_1_0_DEVICE "
+                     "defined!  This is not a OpenCL 1.1 or OpenCL 1.2 "
+                     "compatible device!!!\n");
         }
-        else if( getenv("OPENCL_1_1_DEVICE"))
+        else if (getenv("OPENCL_1_1_DEVICE"))
         {
-          log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  This is not a OpenCL 1.2 compatible device!!!\n" );
+            log_info(
+                "WARNING: This test was run with OPENCL_1_1_DEVICE defined!  "
+                "This is not a OpenCL 1.2 compatible device!!!\n");
         }
         else
         {
-          log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
-          return -1;
+            log_error("ERROR: OpenCL device language version returned is less "
+                      "than 1.%d! (Returned: %s)\n",
+                      minor_revision, (char *)buffer);
+            return -1;
         }
     }
 
     // Sanity checks on the returned values
-    if( length != (strlen( (char *)buffer ) + 1 ))
+    if (length != (strlen((char *)buffer) + 1))
     {
-        log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
+        log_error("ERROR: Returned length of version string does not match "
+                  "actual length (actual: %d, returned: %d)\n",
+                  (int)strlen((char *)buffer), (int)length);
         return -1;
     }
 
     return 0;
 }
-
-- 
cgit v1.2.3


From 8ffecf27c28d28296180cde282e5665bc2cb2c00 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Wed, 8 Dec 2021 17:07:42 +0100
Subject: Fix build, glext should not be used with GLEW (#1337)

* Fix build, glext should not be used with GLEW

* Remove additional define GL_GLEXT_PROTOTYPES

* Remove includes which already defined in setup.h
---
 test_common/gl/setup_win32.cpp | 3 ---
 test_common/gl/setup_x11.cpp   | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/test_common/gl/setup_win32.cpp b/test_common/gl/setup_win32.cpp
index b120a36d..708e681d 100644
--- a/test_common/gl/setup_win32.cpp
+++ b/test_common/gl/setup_win32.cpp
@@ -13,14 +13,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#define GL_GLEXT_PROTOTYPES
 
 #include "setup.h"
 #include "testBase.h"
 #include "harness/errorHelpers.h"
 
-#include <GL/gl.h>
-#include <GL/glut.h>
 #include <CL/cl_ext.h>
 
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
diff --git a/test_common/gl/setup_x11.cpp b/test_common/gl/setup_x11.cpp
index 7efda3d2..abc065c9 100644
--- a/test_common/gl/setup_x11.cpp
+++ b/test_common/gl/setup_x11.cpp
@@ -13,16 +13,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //
-#define GL_GLEXT_PROTOTYPES
 
 #include "setup.h"
 #include "testBase.h"
 #include "harness/errorHelpers.h"
 
-#include <GL/gl.h>
-#include <GL/glut.h>
-#include <GL/glext.h>
-#include <GL/freeglut.h>
 #include <GL/glx.h>
 #include <CL/cl_ext.h>
 
-- 
cgit v1.2.3


From 73d71b6a76ce9697c5224a0933157355302d5002 Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Wed, 8 Dec 2021 16:08:15 +0000
Subject: Add cl_khr_command_buffer to list of extensions (#1365)

cl_khr_command_buffer is now public as a provisional khr extension
which implementations may report.
---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 2f29d39b..1519779a 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -78,6 +78,7 @@ const char *known_extensions[] = {
     "cl_khr_semaphore",
     "cl_khr_external_semaphore",
     "cl_khr_external_semaphore_sync_fd",
+    "cl_khr_command_buffer",
 };
 
 size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *);
-- 
cgit v1.2.3


From 1161d788dd5d71885ca19783210f18c305715a7f Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 14 Dec 2021 17:52:44 +0000
Subject: Refactor logging of subgroup test start/pass messages (#1361)

Note that this also corrects the start messages logged for the
sub_group_ballot_bit_count/find_msb/find_lsb tests.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/subgroup_common_templates.h          | 101 +++++++++++----------
 test_conformance/subgroups/subhelpers.h            |  50 +++++++---
 test_conformance/subgroups/test_barrier.cpp        |  16 +++-
 test_conformance/subgroups/test_ifp.cpp            |   8 +-
 test_conformance/subgroups/test_subgroup.cpp       |   9 +-
 .../subgroups/test_subgroup_ballot.cpp             |  48 +++++++---
 .../subgroups/test_subgroup_clustered_reduce.cpp   |  14 +--
 .../subgroups/test_subgroup_non_uniform_vote.cpp   |  16 ++--
 8 files changed, 164 insertions(+), 98 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 64b4b971..fc0b03b5 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -63,6 +63,13 @@ static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
 // only 4 work_items from subgroup enter the code (are active)
 template <typename Ty, SubgroupsBroadcastOp operation> struct BC
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_%s(%s)...%s\n", operation_names(operation),
+                 TypeManager<Ty>::name(), extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int i, ii, j, k, n;
@@ -76,8 +83,6 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
         int last_subgroup_size = 0;
         ii = 0;
 
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         if (non_uniform_size)
         {
             ng++;
@@ -286,8 +291,6 @@ template <typename Ty, SubgroupsBroadcastOp operation> struct BC
             y += nw;
             m += 4 * nw;
         }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
@@ -437,6 +440,13 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
 
 template <typename Ty, ShuffleOp operation> struct SHF
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_%s(%s)...%s\n", operation_names(operation),
+                 TypeManager<Ty>::name(), extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int i, ii, j, k, l, n, delta;
@@ -447,8 +457,6 @@ template <typename Ty, ShuffleOp operation> struct SHF
         int d = ns > 100 ? 100 : ns;
         ii = 0;
         ng = ng / nw;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         for (k = 0; k < ng; ++k)
         { // for each work_group
             for (j = 0; j < nj; ++j)
@@ -560,26 +568,29 @@ template <typename Ty, ShuffleOp operation> struct SHF
             y += nw;
             m += 4 * nw;
         }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
 
 template <typename Ty, ArithmeticOp operation> struct SCEX_NU
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_scan_exclusive"
+                                     : "sub_group_scan_exclusive");
+        log_info("  %s_%s(%s)...%s\n", func_name.c_str(),
+                 operation_names(operation), TypeManager<Ty>::name(),
+                 extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        std::string func_name;
-        test_params.work_items_mask.any()
-            ? func_name = "sub_group_non_uniform_scan_exclusive"
-            : func_name = "sub_group_scan_exclusive";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -595,11 +606,9 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
         Ty tr, rr;
         ng = ng / nw;
 
-        std::string func_name;
-        test_params.work_items_mask.any()
-            ? func_name = "sub_group_non_uniform_scan_exclusive"
-            : func_name = "sub_group_scan_exclusive";
-
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_scan_exclusive"
+                                     : "sub_group_scan_exclusive");
 
         // for uniform case take into consideration all workitems
         if (!work_items_mask.any())
@@ -656,8 +665,6 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
             m += 4 * nw;
         }
 
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
@@ -665,20 +672,24 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
 // Test for scan inclusive non uniform functions
 template <typename Ty, ArithmeticOp operation> struct SCIN_NU
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_scan_inclusive"
+                                     : "sub_group_scan_inclusive");
+        log_info("  %s_%s(%s)...%s\n", func_name.c_str(),
+                 operation_names(operation), TypeManager<Ty>::name(),
+                 extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        std::string func_name;
-        test_params.work_items_mask.any()
-            ? func_name = "sub_group_non_uniform_scan_inclusive"
-            : func_name = "sub_group_scan_inclusive";
-
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -694,10 +705,9 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
         Ty tr, rr;
         ng = ng / nw;
 
-        std::string func_name;
-        work_items_mask.any()
-            ? func_name = "sub_group_non_uniform_scan_inclusive"
-            : func_name = "sub_group_scan_inclusive";
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_scan_inclusive"
+                                     : "sub_group_scan_inclusive");
 
         // for uniform case take into consideration all workitems
         if (!work_items_mask.any())
@@ -771,8 +781,6 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
             m += 4 * nw;
         }
 
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
@@ -780,6 +788,16 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
 // Test for reduce non uniform functions
 template <typename Ty, ArithmeticOp operation> struct RED_NU
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_reduce"
+                                     : "sub_group_reduce");
+        log_info("  %s_%s(%s)...%s\n", func_name.c_str(),
+                 operation_names(operation), TypeManager<Ty>::name(),
+                 extra_text);
+    }
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
@@ -787,13 +805,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        std::string func_name;
-
-        test_params.work_items_mask.any()
-            ? func_name = "sub_group_non_uniform_reduce"
-            : func_name = "sub_group_reduce";
-        log_info("  %s_%s(%s)...\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -809,9 +820,9 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         ng = ng / nw;
         Ty tr, rr;
 
-        std::string func_name;
-        work_items_mask.any() ? func_name = "sub_group_non_uniform_reduce"
-                              : func_name = "sub_group_reduce";
+        std::string func_name = (test_params.all_work_item_masks.size() > 0
+                                     ? "sub_group_non_uniform_reduce"
+                                     : "sub_group_reduce");
 
         for (k = 0; k < ng; ++k)
         {
@@ -875,8 +886,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
             m += 4 * nw;
         }
 
-        log_info("  %s_%s(%s)... passed\n", func_name.c_str(),
-                 operation_names(operation), TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index bd4b6d61..30105a57 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1380,23 +1380,45 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
                             const char *kname, const char *src,
                             WorkGroupParams test_params)
     {
+        Fns::log_test(test_params, "");
+
         test_status combined_error = TEST_SKIPPED_ITSELF;
         for (auto &mask : test_params.all_work_item_masks)
         {
             test_params.work_items_mask = mask;
-            test_status error = run(device, context, queue, num_elements, kname,
-                                    src, test_params);
+            test_status error = do_run(device, context, queue, num_elements,
+                                       kname, src, test_params);
 
             if (error == TEST_FAIL
                 || (error == TEST_PASS && combined_error != TEST_FAIL))
                 combined_error = error;
         }
+
+        if (combined_error == TEST_PASS)
+        {
+            Fns::log_test(test_params, " passed");
+        }
         return combined_error;
     };
-    static test_status run(cl_device_id device, cl_context context,
-                           cl_command_queue queue, int num_elements,
-                           const char *kname, const char *src,
-                           WorkGroupParams test_params)
+    static int run(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements, const char *kname,
+                   const char *src, WorkGroupParams test_params)
+    {
+        Fns::log_test(test_params, "");
+
+        int error = do_run(device, context, queue, num_elements, kname, src,
+                           test_params);
+
+        if (error == TEST_PASS)
+        {
+            Fns::log_test(test_params, " passed");
+        }
+        return error;
+    };
+    static test_status do_run(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              const char *kname, const char *src,
+                              WorkGroupParams test_params)
     {
         size_t tmp;
         cl_int error;
@@ -1442,16 +1464,14 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             log_info("Data type not supported : %s\n", TypeManager<Ty>::name());
             return TEST_SKIPPED_ITSELF;
         }
-        else
+
+        if (strstr(TypeManager<Ty>::name(), "double"))
+        {
+            kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
+        }
+        else if (strstr(TypeManager<Ty>::name(), "half"))
         {
-            if (strstr(TypeManager<Ty>::name(), "double"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
-            }
-            else if (strstr(TypeManager<Ty>::name(), "half"))
-            {
-                kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
-            }
+            kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_fp16: enable\n";
         }
 
         error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index b570e922..d415eefb 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -59,6 +59,17 @@ static const char *gbar_source =
 // barrier test functions
 template <int Which> struct BAR
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        if (Which == 0)
+            log_info("  sub_group_barrier(CLK_LOCAL_MEM_FENCE)...%s\n",
+                     extra_text);
+        else
+            log_info("  sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...%s\n",
+                     extra_text);
+    }
+
     static void gen(cl_int *x, cl_int *t, cl_int *m,
                     const WorkGroupParams &test_params)
     {
@@ -103,11 +114,6 @@ template <int Which> struct BAR
         ng = ng / nw;
         cl_int tr, rr;
 
-        if (Which == 0)
-            log_info("  sub_group_barrier(CLK_LOCAL_MEM_FENCE)...\n");
-        else
-            log_info("  sub_group_barrier(CLK_GLOBAL_MEM_FENCE)...\n");
-
         for (k = 0; k < ng; ++k)
         {
             // Map to array indexed to array indexed by local ID and sub group
diff --git a/test_conformance/subgroups/test_ifp.cpp b/test_conformance/subgroups/test_ifp.cpp
index f6c5227d..f2bd5b92 100644
--- a/test_conformance/subgroups/test_ifp.cpp
+++ b/test_conformance/subgroups/test_ifp.cpp
@@ -225,6 +225,12 @@ void run_insts(cl_int *x, cl_int *p, int n)
 
 struct IFP
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  independent forward progress...%s\n", extra_text);
+    }
+
     static void gen(cl_int *x, cl_int *t, cl_int *,
                     const WorkGroupParams &test_params)
     {
@@ -258,8 +264,6 @@ struct IFP
         // We need at least 2 sub groups per group for this test
         if (nj == 1) return TEST_SKIPPED_ITSELF;
 
-        log_info("  independent forward progress...\n");
-
         for (k = 0; k < ng; ++k)
         {
             run_insts(x, t, nj);
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
index eefca5f8..aa9b32cb 100644
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ b/test_conformance/subgroups/test_subgroup.cpp
@@ -24,6 +24,13 @@ namespace {
 // Any/All test functions
 template <NonUniformVoteOp operation> struct AA
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_%s...%s\n", operation_names(operation),
+                 extra_text);
+    }
+
     static void gen(cl_int *x, cl_int *t, cl_int *m,
                     const WorkGroupParams &test_params)
     {
@@ -35,7 +42,6 @@ template <NonUniformVoteOp operation> struct AA
         int e;
         ng = ng / nw;
         ii = 0;
-        log_info("  sub_group_%s...\n", operation_names(operation));
         for (k = 0; k < ng; ++k)
         {
             for (j = 0; j < nj; ++j)
@@ -124,7 +130,6 @@ template <NonUniformVoteOp operation> struct AA
             y += nw;
             m += 4 * nw;
         }
-        log_info("  sub_group_%s... passed\n", operation_names(operation));
         return TEST_PASS;
     }
 };
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index e742aa3b..837988ea 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -23,6 +23,12 @@ namespace {
 // Test for ballot functions
 template <typename Ty> struct BALLOT
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_ballot...%s\n", extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         // no work here
@@ -30,7 +36,6 @@ template <typename Ty> struct BALLOT
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
         int non_uniform_size = gws % lws;
-        log_info("  sub_group_ballot...\n");
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -92,7 +97,6 @@ template <typename Ty> struct BALLOT
             y += lws;
             m += 4 * lws;
         }
-        log_info("  sub_group_ballot... passed\n");
         return TEST_PASS;
     }
 };
@@ -100,6 +104,13 @@ template <typename Ty> struct BALLOT
 // Test for bit extract ballot functions
 template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_ballot_%s(%s)...%s\n", operation_names(operation),
+                 TypeManager<Ty>::name(), extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int wi_id, sb_id, wg_id, l;
@@ -110,8 +121,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
         int wg_number = gws / lws;
         int limit_sbs = sbs > 100 ? 100 : sbs;
         int non_uniform_size = gws % lws;
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
 
         for (wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
@@ -251,21 +260,24 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
             y += lws;
             m += 4 * lws;
         }
-        log_info("  sub_group_%s(%s)... passed\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
 
 template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_inverse_ballot...%s\n", extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
         int non_uniform_size = gws % lws;
-        log_info("  sub_group_inverse_ballot...\n");
         // no work here
     }
 
@@ -341,7 +353,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
             m += 4 * lws;
         }
 
-        log_info("  sub_group_inverse_ballot... passed\n");
         return TEST_PASS;
     }
 };
@@ -350,6 +361,13 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
 // Test for bit count/inclusive and exclusive scan/ find lsb msb ballot function
 template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_%s(%s)...%s\n", operation_names(operation),
+                 TypeManager<Ty>::name(), extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, sb_id;
@@ -362,8 +380,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
         int last_subgroup_size = 0;
         int current_sbs = 0;
 
-        log_info("  sub_group_%s(%s)...\n", operation_names(operation),
-                 TypeManager<Ty>::name());
         if (non_uniform_size)
         {
             wg_number++;
@@ -562,8 +578,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
             y += lws;
             m += 4 * lws;
         }
-        log_info("  sub_group_ballot_%s(%s)... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name());
         return TEST_PASS;
     }
 };
@@ -571,6 +585,13 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
 // test mask functions
 template <typename Ty, BallotOp operation> struct SMASK
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  get_sub_group_%s_mask...%s\n", operation_names(operation),
+                 extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int wi_id, wg_id, l, sb_id;
@@ -579,7 +600,6 @@ template <typename Ty, BallotOp operation> struct SMASK
         int sbs = test_params.subgroup_size;
         int sb_number = (lws + sbs - 1) / sbs;
         int wg_number = gws / lws;
-        log_info("  get_sub_group_%s_mask...\n", operation_names(operation));
         for (wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
             for (sb_id = 0; sb_id < sb_number; ++sb_id)
@@ -655,8 +675,6 @@ template <typename Ty, BallotOp operation> struct SMASK
             y += lws;
             m += 4 * lws;
         }
-        log_info("  get_sub_group_%s_mask... passed\n",
-                 operation_names(operation));
         return TEST_PASS;
     }
 };
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index ad9e1ff2..f5872006 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -38,15 +38,20 @@ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type
 // Test for reduce cluster functions
 template <typename Ty, ArithmeticOp operation> struct RED_CLU
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...%s\n",
+                 operation_names(operation), TypeManager<Ty>::name(),
+                 sizeof(Ty), extra_text);
+    }
+
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ...\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
         genrand<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
@@ -124,9 +129,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
             y += nw;
             m += 4 * nw;
         }
-        log_info("  sub_group_clustered_reduce_%s(%s, %d bytes) ... passed\n",
-                 operation_names(operation), TypeManager<Ty>::name(),
-                 sizeof(Ty));
         return TEST_PASS;
     }
 };
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index b21a9f7e..3f0985e2 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -22,6 +22,15 @@ namespace {
 
 template <typename T, NonUniformVoteOp operation> struct VOTE
 {
+    static void log_test(const WorkGroupParams &test_params,
+                         const char *extra_text)
+    {
+        log_info("  sub_group_%s%s(%s)...%s\n",
+                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
+                 operation_names(operation), TypeManager<T>::name(),
+                 extra_text);
+    }
+
     static void gen(T *x, T *t, cl_int *m, const WorkGroupParams &test_params)
     {
         int i, ii, j, k, n;
@@ -34,10 +43,6 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
         int last_subgroup_size = 0;
         ii = 0;
 
-        log_info("  sub_group_%s%s(%s)... \n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation), TypeManager<T>::name());
-
         if (operation == NonUniformVoteOp::elect) return;
 
         for (k = 0; k < ng; ++k)
@@ -192,9 +197,6 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
             m += 4 * nw;
         }
 
-        log_info("  sub_group_%s%s(%s)... passed\n",
-                 (operation == NonUniformVoteOp::elect) ? "" : "non_uniform_",
-                 operation_names(operation), TypeManager<T>::name());
         return TEST_PASS;
     }
 };
-- 
cgit v1.2.3


From c2facedfa0a0e07f7602cfecae90392419c0e159 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Wed, 5 Jan 2022 08:43:50 -0700
Subject: Remove dead threading code (#1339)

Remove unused code that hasn't been used for the last three years
and isn't included in makefiles.

Co-authored-by: oramirez <oramirez@qti.qualcomm.com>
---
 test_common/harness/threadTesting.cpp | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 test_common/harness/threadTesting.cpp

diff --git a/test_common/harness/threadTesting.cpp b/test_common/harness/threadTesting.cpp
deleted file mode 100644
index e69de29b..00000000
-- 
cgit v1.2.3


From b71c2047943a44a2e99c367e406e680caa160bfe Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Wed, 5 Jan 2022 17:08:52 +0100
Subject: test_subgroups - Set safe input values for half type and mul, add
 operations (#1346)

* Set safe input values for half type and mul, add operations

* Set safe values for all data types

* Typo fix

* Set constant seed for shuffle

* Change function name to more specific

* set_value takes an integer value, not a bit pattern
---
 .../subgroups/subgroup_common_templates.h          | 48 ++++++++++++++++++----
 .../subgroups/test_subgroup_clustered_reduce.cpp   |  2 +-
 2 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index fc0b03b5..641c1875 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -20,6 +20,8 @@
 #include "CL/cl_half.h"
 #include "subhelpers.h"
 #include <set>
+#include <algorithm>
+#include <random>
 
 static cl_uint4 generate_bit_mask(cl_uint subgroup_local_id,
                                   const std::string &mask_type,
@@ -391,11 +393,44 @@ template <typename Ty> bool is_floating_point()
         || std::is_same<Ty, subgroups::cl_half>::value;
 }
 
+// limit possible input values to avoid arithmetic rounding/overflow issues.
+// for each subgroup values defined different values
+// for rest of workitems set 1
+// shuffle values
+static void fill_and_shuffle_safe_values(std::vector<cl_ulong> &safe_values,
+                                         int sb_size)
+{
+    // max product is 720, cl_half has enough precision for it
+    const std::vector<cl_ulong> non_one_values{ 2, 3, 4, 5, 6 };
+
+    if (sb_size <= non_one_values.size())
+    {
+        safe_values.assign(non_one_values.begin(),
+                           non_one_values.begin() + sb_size);
+    }
+    else
+    {
+        safe_values.assign(sb_size, 1);
+        std::copy(non_one_values.begin(), non_one_values.end(),
+                  safe_values.begin());
+    }
+
+    std::mt19937 mersenne_twister_engine(10000);
+    std::shuffle(safe_values.begin(), safe_values.end(),
+                 mersenne_twister_engine);
+};
+
 template <typename Ty, ArithmeticOp operation>
-void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
+void generate_inputs(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
 {
     int nj = (nw + ns - 1) / ns;
 
+    std::vector<cl_ulong> safe_values;
+    if (operation == ArithmeticOp::mul_ || operation == ArithmeticOp::add_)
+    {
+        fill_and_shuffle_safe_values(safe_values, ns);
+    }
+
     for (int k = 0; k < ng; ++k)
     {
         for (int j = 0; j < nj; ++j)
@@ -406,13 +441,10 @@ void genrand(Ty *x, Ty *t, cl_int *m, int ns, int nw, int ng)
             for (int i = 0; i < n; ++i)
             {
                 cl_ulong out_value;
-                double y;
                 if (operation == ArithmeticOp::mul_
                     || operation == ArithmeticOp::add_)
                 {
-                    // work around to avoid overflow, do not use 0 for
-                    // multiplication
-                    out_value = (genrand_int32(gMTdata) % 4) + 1;
+                    out_value = safe_values[i];
                 }
                 else
                 {
@@ -591,7 +623,7 @@ template <typename Ty, ArithmeticOp operation> struct SCEX_NU
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
+        generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -689,7 +721,7 @@ template <typename Ty, ArithmeticOp operation> struct SCIN_NU
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
+        generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
@@ -805,7 +837,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_NU
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
+        generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index f5872006..527be5ad 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -52,7 +52,7 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         ng = ng / nw;
-        genrand<Ty, operation>(x, t, m, ns, nw, ng);
+        generate_inputs<Ty, operation>(x, t, m, ns, nw, ng);
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
-- 
cgit v1.2.3


From f91daf3d062d7d085bd9e9154869d2179655685f Mon Sep 17 00:00:00 2001
From: Jim Lewis <j.lewis1@samsung.com>
Date: Thu, 6 Jan 2022 04:23:07 -0600
Subject: Remove invalid negative_get_platform_info testcase (#1374)

* Remove invalid negative_get_platform_info testcase

* Implementations are only required to do null checks
* Fixes #1318

* Fix formatting
---
 test_conformance/api/negative_platform.cpp | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/test_conformance/api/negative_platform.cpp b/test_conformance/api/negative_platform.cpp
index 7d9de5df..861d4748 100644
--- a/test_conformance/api/negative_platform.cpp
+++ b/test_conformance/api/negative_platform.cpp
@@ -42,18 +42,9 @@ int test_negative_get_platform_info(cl_device_id deviceID, cl_context context,
 {
     cl_platform_id platform = getPlatformFromDevice(deviceID);
 
-    cl_int err =
-        clGetPlatformInfo(reinterpret_cast<cl_platform_id>(deviceID),
-                          CL_PLATFORM_VERSION, sizeof(char*), nullptr, nullptr);
-    test_failure_error_ret(
-        err, CL_INVALID_PLATFORM,
-        "clGetPlatformInfo should return CL_INVALID_PLATFORM  when: \"platform "
-        "is not a valid platform\" using a valid object which is NOT a "
-        "platform",
-        TEST_FAIL);
-
     constexpr cl_platform_info INVALID_PARAM_VALUE = 0;
-    err = clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
+    cl_int err =
+        clGetPlatformInfo(platform, INVALID_PARAM_VALUE, 0, nullptr, nullptr);
     test_failure_error_ret(
         err, CL_INVALID_VALUE,
         "clGetPlatformInfo should return CL_INVALID_VALUE when: \"param_name "
-- 
cgit v1.2.3


From 51c6d97d2f9d62e5bdcbc1f4cbec2d5be2bedf0a Mon Sep 17 00:00:00 2001
From: Jim Lewis <j.lewis1@samsung.com>
Date: Thu, 6 Jan 2022 04:26:20 -0600
Subject: Fix test_api get_command_queue_info (#1324)

* Fix test_api get_command_queue_info

Decouple host and device out-of-order test enabling

* Rename property sets more generically

* Refactor to use std::vector to accumulate test permutations
---
 test_conformance/api/test_queries.cpp | 127 +++++++++++++++++++---------------
 1 file changed, 70 insertions(+), 57 deletions(-)

diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index 469a1934..30b5706f 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,6 +19,7 @@
 #include <stdlib.h>
 #include <ctype.h>
 #include <algorithm>
+#include <vector>
 
 int test_get_platform_info(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
@@ -345,87 +346,100 @@ int command_queue_param_test(cl_command_queue queue,
     return 0;
 }
 
-#define MIN_NUM_COMMAND_QUEUE_PROPERTIES 2
-#define OOO_NUM_COMMAND_QUEUE_PROPERTIES 4
-static cl_command_queue_properties property_options[] = {
-    0,
-
-    CL_QUEUE_PROFILING_ENABLE,
-
-    CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
-
-    CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
-        | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
-};
-
 int check_get_command_queue_info_params(cl_device_id deviceID,
                                         cl_context context,
                                         bool is_compatibility)
 {
-    int error;
-    size_t size;
+    const cl_command_queue_properties host_optional[] = {
+        CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+        CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+    };
+
+    const cl_command_queue_properties device_required[] = {
+        CL_QUEUE_ON_DEVICE | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+        CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
+            | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+        CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT
+            | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE,
+        CL_QUEUE_PROFILING_ENABLE | CL_QUEUE_ON_DEVICE
+            | CL_QUEUE_ON_DEVICE_DEFAULT
+            | CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
+    };
+
+    const size_t host_optional_size = ARRAY_SIZE(host_optional);
+    const size_t device_required_size = ARRAY_SIZE(device_required);
+
+    Version version = get_device_cl_version(deviceID);
 
-    cl_queue_properties host_queue_props, device_queue_props;
-    cl_queue_properties queue_props[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+    const cl_device_info host_queue_query = version >= Version(2, 0)
+        ? CL_DEVICE_QUEUE_ON_HOST_PROPERTIES
+        : CL_DEVICE_QUEUE_PROPERTIES;
 
-    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
-                    sizeof(host_queue_props), &host_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)host_queue_props);
-    clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
-                    sizeof(device_queue_props), &device_queue_props, NULL);
-    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n",
-             (int)device_queue_props);
+    cl_queue_properties host_queue_props = 0;
+    int error =
+        clGetDeviceInfo(deviceID, host_queue_query, sizeof(host_queue_props),
+                        &host_queue_props, NULL);
+    test_error(error, "clGetDeviceInfo failed");
+    log_info("CL_DEVICE_QUEUE_ON_HOST_PROPERTIES is %d\n", host_queue_props);
 
-    auto version = get_device_cl_version(deviceID);
+    cl_queue_properties device_queue_props = 0;
+    if (version >= Version(2, 0))
+    {
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
+                                sizeof(device_queue_props), &device_queue_props,
+                                NULL);
+        test_error(error, "clGetDeviceInfo failed");
+        log_info("CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES is %d\n",
+                 device_queue_props);
+    }
+
+    bool out_of_order_supported =
+        host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
 
-    // Are on device queues supported
     bool on_device_supported =
         (version >= Version(2, 0) && version < Version(3, 0))
         || (version >= Version(3, 0) && device_queue_props != 0);
 
-    int num_test_options = MIN_NUM_COMMAND_QUEUE_PROPERTIES;
-    if (host_queue_props & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE)
+    // test device queues if the device and the API under test support it
+    bool test_on_device = on_device_supported && !is_compatibility;
+
+    std::vector<cl_queue_properties> queue_props{ 0,
+                                                  CL_QUEUE_PROFILING_ENABLE };
+
+    if (out_of_order_supported)
     {
-        // Test out-of-order queues properties if supported
-        num_test_options = OOO_NUM_COMMAND_QUEUE_PROPERTIES;
-    }
-    if (on_device_supported && !is_compatibility)
+        queue_props.insert(queue_props.end(), &host_optional[0],
+                           &host_optional[host_optional_size]);
+    };
+
+    cl_queue_properties queue_props_arg[] = { CL_QUEUE_PROPERTIES, 0, 0 };
+
+    if (test_on_device)
     {
-        // Test queue on device if supported (in this case out-of-order must
-        // also be supported)
-        num_test_options = ARRAY_SIZE(property_options);
-    }
+        queue_props.insert(queue_props.end(), &device_required[0],
+                           &device_required[device_required_size]);
+    };
 
-    for (int i = 0; i < num_test_options; i++)
+    for (cl_queue_properties props : queue_props)
     {
-        queue_props[1] = property_options[i];
-        clCommandQueueWrapper queue;
 
+        queue_props_arg[1] = props;
+
+        clCommandQueueWrapper queue;
         if (is_compatibility)
         {
-            queue =
-                clCreateCommandQueue(context, deviceID, queue_props[1], &error);
+            queue = clCreateCommandQueue(context, deviceID, props, &error);
             test_error(error, "Unable to create command queue to test with");
         }
         else
         {
             queue = clCreateCommandQueueWithProperties(context, deviceID,
-                                                       &queue_props[0], &error);
+                                                       queue_props_arg, &error);
             test_error(error, "Unable to create command queue to test with");
         }
 
         cl_uint refCount;
+        size_t size;
         error = clGetCommandQueueInfo(queue, CL_QUEUE_REFERENCE_COUNT,
                                       sizeof(refCount), &refCount, &size);
         test_error(error, "Unable to get command queue reference count");
@@ -442,11 +456,12 @@ int check_get_command_queue_info_params(cl_device_id deviceID,
         test_error(error, "param checking failed");
 
         error = command_queue_param_test(queue, CL_QUEUE_PROPERTIES,
-                                         queue_props[1], "properties");
+                                         queue_props_arg[1], "properties");
         test_error(error, "param checking failed");
     }
     return 0;
 }
+
 int test_get_command_queue_info(cl_device_id deviceID, cl_context context,
                                 cl_command_queue ignoreQueue, int num_elements)
 {
@@ -824,5 +839,3 @@ int test_kernel_required_group_size(cl_device_id deviceID, cl_context context, c
 
     return 0;
 }
-
-
-- 
cgit v1.2.3


From 06415f8b79c38bb08279c8267d38b41101f32760 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 11 Jan 2022 09:52:11 -0700
Subject: Fix memory leaks (#1378)

* Fix memory leaks

Fixed memory leaks in: buffers, basic, and vectors

* Formatting fixes

Co-authored-by: oramirez <oramirez@qti.qualcomm.com>
---
 test_conformance/basic/test_vector_swizzle.cpp | 58 ++++++++++++++++----------
 test_conformance/buffers/test_buffer_fill.cpp  |  4 +-
 test_conformance/buffers/test_buffer_read.cpp  |  4 +-
 test_conformance/vectors/test_step.cpp         |  2 +
 4 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/test_conformance/basic/test_vector_swizzle.cpp b/test_conformance/basic/test_vector_swizzle.cpp
index 5ab3ea4f..884bcf36 100644
--- a/test_conformance/basic/test_vector_swizzle.cpp
+++ b/test_conformance/basic/test_vector_swizzle.cpp
@@ -610,9 +610,6 @@ static int test_vectype(const char* type_name, cl_device_id device,
     cl_int error = CL_SUCCESS;
     int result = TEST_PASS;
 
-    clProgramWrapper program;
-    clKernelWrapper kernel;
-
     std::string buildOptions{ "-DTYPE=" };
     buildOptions += type_name;
     buildOptions += std::to_string(N);
@@ -628,35 +625,50 @@ static int test_vectype(const char* type_name, cl_device_id device,
     makeReference<T, N, S>(reference);
 
     // XYZW swizzles:
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
 
-    const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
-    error = create_single_kernel_helper(
-        context, &program, &kernel, 1, &xyzw_source, "test_vector_swizzle_xyzw",
-        buildOptions.c_str());
-    test_error(error, "Unable to create xyzw test kernel");
+        const char* xyzw_source = TestInfo<N>::kernel_source_xyzw;
+        error = create_single_kernel_helper(
+            context, &program, &kernel, 1, &xyzw_source,
+            "test_vector_swizzle_xyzw", buildOptions.c_str());
+        test_error(error, "Unable to create xyzw test kernel");
 
-    result |= test_vectype_case(value, reference, context, kernel, queue);
+        result |= test_vectype_case(value, reference, context, kernel, queue);
+    }
 
     // sN swizzles:
-    const char* sN_source = TestInfo<N>::kernel_source_sN;
-    error = create_single_kernel_helper(context, &program, &kernel, 1,
-                                        &sN_source, "test_vector_swizzle_sN",
-                                        buildOptions.c_str());
-    test_error(error, "Unable to create sN test kernel");
+    {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
+
+        const char* sN_source = TestInfo<N>::kernel_source_sN;
+        error = create_single_kernel_helper(
+            context, &program, &kernel, 1, &sN_source, "test_vector_swizzle_sN",
+            buildOptions.c_str());
+        test_error(error, "Unable to create sN test kernel");
 
-    result |= test_vectype_case(value, reference, context, kernel, queue);
+        result |= test_vectype_case(value, reference, context, kernel, queue);
+    }
 
     // RGBA swizzles for OpenCL 3.0 and newer:
-    const Version device_version = get_device_cl_version(device);
-    if (device_version >= Version(3, 0))
     {
-        const char* rgba_source = TestInfo<N>::kernel_source_rgba;
-        error = create_single_kernel_helper(
-            context, &program, &kernel, 1, &rgba_source,
-            "test_vector_swizzle_rgba", buildOptions.c_str());
-        test_error(error, "Unable to create rgba test kernel");
+        clProgramWrapper program;
+        clKernelWrapper kernel;
 
-        result |= test_vectype_case(value, reference, context, kernel, queue);
+        const Version device_version = get_device_cl_version(device);
+        if (device_version >= Version(3, 0))
+        {
+            const char* rgba_source = TestInfo<N>::kernel_source_rgba;
+            error = create_single_kernel_helper(
+                context, &program, &kernel, 1, &rgba_source,
+                "test_vector_swizzle_rgba", buildOptions.c_str());
+            test_error(error, "Unable to create rgba test kernel");
+
+            result |=
+                test_vectype_case(value, reference, context, kernel, queue);
+        }
     }
 
     return result;
diff --git a/test_conformance/buffers/test_buffer_fill.cpp b/test_conformance/buffers/test_buffer_fill.cpp
index 9c9c7d17..92079794 100644
--- a/test_conformance/buffers/test_buffer_fill.cpp
+++ b/test_conformance/buffers/test_buffer_fill.cpp
@@ -703,8 +703,6 @@ int test_buffer_fill( cl_device_id deviceID, cl_context context, cl_command_queu
 int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
 {
     TestStruct pattern;
-    clProgramWrapper program;
-    clKernelWrapper kernel;
     size_t      ptrSize = sizeof( TestStruct );
     size_t      global_work_size[3];
     int         n, err;
@@ -720,6 +718,8 @@ int test_buffer_fill_struct( cl_device_id deviceID, cl_context context, cl_comma
 
     for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
     {
+        clProgramWrapper program;
+        clKernelWrapper kernel;
         log_info("Testing with cl_mem_flags: %s\n",
                  flag_set_names[src_flag_id]);
 
diff --git a/test_conformance/buffers/test_buffer_read.cpp b/test_conformance/buffers/test_buffer_read.cpp
index 39cf3297..49a57f92 100644
--- a/test_conformance/buffers/test_buffer_read.cpp
+++ b/test_conformance/buffers/test_buffer_read.cpp
@@ -763,7 +763,6 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
 {
     clProgramWrapper program[5];
     clKernelWrapper kernel[5];
-    clEventWrapper event;
     void        *outptr[5];
     void        *inptr[5];
     size_t      global_work_size[3];
@@ -805,6 +804,7 @@ int test_buffer_read_async( cl_device_id deviceID, cl_context context, cl_comman
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
             clMemWrapper buffer;
+            clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
@@ -900,7 +900,6 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
 {
     clProgramWrapper program[5];
     clKernelWrapper kernel[5];
-    clEventWrapper event;
     void        *outptr[5], *inptr[5];
     size_t      global_work_size[3];
     cl_int      err;
@@ -941,6 +940,7 @@ int test_buffer_read_array_barrier( cl_device_id deviceID, cl_context context, c
         for (src_flag_id = 0; src_flag_id < NUM_FLAGS; src_flag_id++)
         {
             clMemWrapper buffer;
+            clEventWrapper event;
             outptr[i] = align_malloc(ptrSizes[i] * num_elements, min_alignment);
             if ( ! outptr[i] ){
                 log_error( " unable to allocate %d bytes for outptr\n", (int)(ptrSizes[i] * num_elements) );
diff --git a/test_conformance/vectors/test_step.cpp b/test_conformance/vectors/test_step.cpp
index 2f6ad187..089bad2f 100644
--- a/test_conformance/vectors/test_step.cpp
+++ b/test_conformance/vectors/test_step.cpp
@@ -172,6 +172,8 @@ int test_step_internal(cl_device_id deviceID, cl_context context,
                 destroyClState(pClState);
                 return -1;
             }
+
+            clStateDestroyProgramAndKernel(pClState);
         }
     }
 
-- 
cgit v1.2.3


From 656886030b294225b92379ef14306b2e5b9a3f04 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Wed, 19 Jan 2022 14:17:54 +0000
Subject: Refactor divergence mask handling in subgroup tests (#1379)

This changes compilation of subgroup test kernels so that a separate
compilation is no longer performed for each divergence mask value.

The divergence mask is now passed as a kernel argument.

This also fixes all subgroup_functions_non_uniform_arithmetic testing
and the sub_group_elect and sub_group_any/all_equal subtests of the
subgroup_functions_non_uniform_vote test to use the correct order of
vector components for GPUs with a subgroup size greater than 64.

The conversion of divergence mask bitsets to uint4 vectors has been
corrected to match code comments in WorkGroupParams::load_masks()
in test_conformance/subgroups/subhelpers.h.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/subhelpers.h            | 172 ++++++++++-----------
 .../test_subgroup_non_uniform_arithmetic.cpp       |   8 +-
 .../subgroups/test_subgroup_non_uniform_vote.cpp   |  14 +-
 3 files changed, 96 insertions(+), 98 deletions(-)

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 30105a57..aa4abc96 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -34,12 +34,24 @@ extern MTdata gMTdata;
 typedef std::bitset<128> bs128;
 extern cl_half_rounding_mode g_rounding_mode;
 
+static cl_uint4 bs128_to_cl_uint4(bs128 v)
+{
+    bs128 bs128_ffffffff = 0xffffffffU;
+
+    cl_uint4 r;
+    r.s0 = ((v >> 0) & bs128_ffffffff).to_ulong();
+    r.s1 = ((v >> 32) & bs128_ffffffff).to_ulong();
+    r.s2 = ((v >> 64) & bs128_ffffffff).to_ulong();
+    r.s3 = ((v >> 96) & bs128_ffffffff).to_ulong();
+
+    return r;
+}
+
 struct WorkGroupParams
 {
-    WorkGroupParams(size_t gws, size_t lws,
-                    bool use_mask = false)
+    WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1)
         : global_workgroup_size(gws), local_workgroup_size(lws),
-          use_masks(use_mask)
+          divergence_mask_arg(dm_arg)
     {
         subgroup_size = 0;
         work_items_mask = 0;
@@ -54,7 +66,7 @@ struct WorkGroupParams
     int dynsc;
     bool use_core_subgroups;
     std::vector<bs128> all_work_item_masks;
-    bool use_masks;
+    int divergence_mask_arg;
     void save_kernel_source(const std::string &source, std::string name = "")
     {
         if (name == "")
@@ -84,7 +96,7 @@ private:
     std::map<std::string, std::string> kernel_function_name;
     void load_masks()
     {
-        if (use_masks)
+        if (divergence_mask_arg != -1)
         {
             // 1 in string will be set 1, 0 will be set 0
             bs128 mask_0xf0f0f0f0("11110000111100001111000011110000"
@@ -1375,50 +1387,10 @@ static int run_kernel(cl_context context, cl_command_queue queue,
 // Driver for testing a single built in function
 template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
 {
-    static test_status mrun(cl_device_id device, cl_context context,
-                            cl_command_queue queue, int num_elements,
-                            const char *kname, const char *src,
-                            WorkGroupParams test_params)
-    {
-        Fns::log_test(test_params, "");
-
-        test_status combined_error = TEST_SKIPPED_ITSELF;
-        for (auto &mask : test_params.all_work_item_masks)
-        {
-            test_params.work_items_mask = mask;
-            test_status error = do_run(device, context, queue, num_elements,
-                                       kname, src, test_params);
-
-            if (error == TEST_FAIL
-                || (error == TEST_PASS && combined_error != TEST_FAIL))
-                combined_error = error;
-        }
-
-        if (combined_error == TEST_PASS)
-        {
-            Fns::log_test(test_params, " passed");
-        }
-        return combined_error;
-    };
-    static int run(cl_device_id device, cl_context context,
-                   cl_command_queue queue, int num_elements, const char *kname,
-                   const char *src, WorkGroupParams test_params)
-    {
-        Fns::log_test(test_params, "");
-
-        int error = do_run(device, context, queue, num_elements, kname, src,
-                           test_params);
-
-        if (error == TEST_PASS)
-        {
-            Fns::log_test(test_params, " passed");
-        }
-        return error;
-    };
-    static test_status do_run(cl_device_id device, cl_context context,
-                              cl_command_queue queue, int num_elements,
-                              const char *kname, const char *src,
-                              WorkGroupParams test_params)
+    static test_status run(cl_device_id device, cl_context context,
+                           cl_command_queue queue, int num_elements,
+                           const char *kname, const char *src,
+                           WorkGroupParams test_params)
     {
         size_t tmp;
         cl_int error;
@@ -1436,25 +1408,8 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         std::vector<Ty> mapout;
         mapout.resize(local);
         std::stringstream kernel_sstr;
-        if (test_params.use_masks)
-        {
-            // Prapare uint4 type to store bitmask on kernel OpenCL C side
-            // To keep order the first characet in string is the lowest bit
-            // there was a need to give such offset to bitset constructor
-            // (first highest offset = 96)
-            std::bitset<32> bits_1_32(test_params.work_items_mask.to_string(),
-                                      96, 32);
-            std::bitset<32> bits_33_64(test_params.work_items_mask.to_string(),
-                                       64, 32);
-            std::bitset<32> bits_65_96(test_params.work_items_mask.to_string(),
-                                       32, 32);
-            std::bitset<32> bits_97_128(test_params.work_items_mask.to_string(),
-                                        0, 32);
-            kernel_sstr << "global uint4 work_item_mask_vector = (uint4)(0b"
-                        << bits_1_32 << ",0b" << bits_33_64 << ",0b"
-                        << bits_65_96 << ",0b" << bits_97_128 << ");\n";
-        }
 
+        Fns::log_test(test_params, "");
 
         kernel_sstr << "#define NR_OF_ACTIVE_WORK_ITEMS ";
         kernel_sstr << NR_OF_ACTIVE_WORK_ITEMS << "\n";
@@ -1563,6 +1518,18 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         idata.resize(input_array_size);
         odata.resize(output_array_size);
 
+        if (test_params.divergence_mask_arg != -1)
+        {
+            cl_uint4 mask_vector;
+            mask_vector.x = 0xffffffffU;
+            mask_vector.y = 0xffffffffU;
+            mask_vector.z = 0xffffffffU;
+            mask_vector.w = 0xffffffffU;
+            error = clSetKernelArg(kernel, test_params.divergence_mask_arg,
+                                   sizeof(cl_uint4), &mask_vector);
+            test_error_fail(error, "Unable to set divergence mask argument");
+        }
+
         // Run the kernel once on zeroes to get the map
         memset(idata.data(), 0, input_array_size * sizeof(Ty));
         error = run_kernel(context, queue, kernel, global, local, idata.data(),
@@ -1572,25 +1539,65 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         test_error_fail(error, "Running kernel first time failed");
 
         // Generate the desired input for the kernel
-
         test_params.subgroup_size = subgroup_size;
         Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
-        error = run_kernel(context, queue, kernel, global, local, idata.data(),
+
+        test_status combined_status;
+
+        if (test_params.divergence_mask_arg != -1)
+        {
+            combined_status = TEST_SKIPPED_ITSELF;
+
+            for (auto &mask : test_params.all_work_item_masks)
+            {
+                test_params.work_items_mask = mask;
+                cl_uint4 mask_vector = bs128_to_cl_uint4(mask);
+                clSetKernelArg(kernel, test_params.divergence_mask_arg,
+                               sizeof(cl_uint4), &mask_vector);
+                error = run_kernel(context, queue, kernel, global, local,
+                                   idata.data(), input_array_size * sizeof(Ty),
+                                   sgmap.data(), global * sizeof(cl_int4),
+                                   odata.data(), output_array_size * sizeof(Ty),
+                                   TSIZE * sizeof(Ty));
+                test_error_fail(error, "Running kernel second time failed");
+
+                // Check the result
+                test_status status =
+                    Fns::chk(idata.data(), odata.data(), mapin.data(),
+                             mapout.data(), sgmap.data(), test_params);
+
+                if (status == TEST_FAIL
+                    || (status == TEST_PASS && combined_status != TEST_FAIL))
+                    combined_status = status;
+
+                if (status == TEST_FAIL) break;
+            }
+        }
+        else
+        {
+            error =
+                run_kernel(context, queue, kernel, global, local, idata.data(),
                            input_array_size * sizeof(Ty), sgmap.data(),
                            global * sizeof(cl_int4), odata.data(),
                            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-        test_error_fail(error, "Running kernel second time failed");
+            test_error_fail(error, "Running kernel second time failed");
 
-        // Check the result
-        test_status status = Fns::chk(idata.data(), odata.data(), mapin.data(),
-                                      mapout.data(), sgmap.data(), test_params);
+            // Check the result
+            combined_status =
+                Fns::chk(idata.data(), odata.data(), mapin.data(),
+                         mapout.data(), sgmap.data(), test_params);
+        }
         // Detailed failure and skip messages should be logged by Fns::gen
         // and Fns::chk.
-        if (status == TEST_FAIL)
+        if (combined_status == TEST_PASS)
+        {
+            Fns::log_test(test_params, " passed");
+        }
+        else if (combined_status == TEST_FAIL)
         {
             test_fail("Data verification failed\n");
         }
-        return status;
+        return combined_status;
     }
 };
 
@@ -1643,18 +1650,9 @@ struct RunTestForType
             std::regex_replace(test_params_.get_kernel_source(function_name),
                                std::regex("\\%s"), function_name);
         std::string kernel_name = "test_" + function_name;
-        if (test_params_.all_work_item_masks.size() > 0)
-        {
-            error = test<T, U>::mrun(device_, context_, queue_, num_elements_,
-                                     kernel_name.c_str(), source.c_str(),
-                                     test_params_);
-        }
-        else
-        {
-            error = test<T, U>::run(device_, context_, queue_, num_elements_,
-                                    kernel_name.c_str(), source.c_str(),
-                                    test_params_);
-        }
+        error =
+            test<T, U>::run(device_, context_, queue_, num_elements_,
+                            kernel_name.c_str(), source.c_str(), test_params_);
 
         // If we return TEST_SKIPPED_ITSELF here, then an entire suite may be
         // reported as having been skipped even if some tests within it
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
index 5ab45222..02fc507b 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_arithmetic.cpp
@@ -21,7 +21,7 @@
 namespace {
 
 std::string sub_group_non_uniform_arithmetic_source = R"(
-    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
         int gid = get_global_id(0);
         XY(xy,gid);
         uint subgroup_local_id = get_sub_group_local_id();
@@ -32,9 +32,9 @@ std::string sub_group_non_uniform_arithmetic_source = R"(
         } else if(subgroup_local_id < 64) {
             work_item_mask = work_item_mask_vector.y;
         } else if(subgroup_local_id < 96) {
-            work_item_mask = work_item_mask_vector.w;
-        } else if(subgroup_local_id < 128) {
             work_item_mask = work_item_mask_vector.z;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.w;
         }
         if (elect_work_item & work_item_mask){
             out[gid] = %s(in[gid]);
@@ -136,7 +136,7 @@ int test_subgroup_functions_non_uniform_arithmetic(cl_device_id device,
 
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size, true);
+    WorkGroupParams test_params(global_work_size, local_work_size, 3);
     test_params.save_kernel_source(sub_group_non_uniform_arithmetic_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
diff --git a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
index 3f0985e2..3be1ba30 100644
--- a/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
+++ b/test_conformance/subgroups/test_subgroup_non_uniform_vote.cpp
@@ -202,7 +202,7 @@ template <typename T, NonUniformVoteOp operation> struct VOTE
 };
 
 std::string sub_group_elect_source = R"(
-    __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out) {
+    __kernel void test_sub_group_elect(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
         int gid = get_global_id(0);
         XY(xy,gid);
         uint subgroup_local_id = get_sub_group_local_id();
@@ -213,9 +213,9 @@ std::string sub_group_elect_source = R"(
         } else if(subgroup_local_id < 64) {
             work_item_mask = work_item_mask_vector.y;
         } else if(subgroup_local_id < 96) {
-            work_item_mask = work_item_mask_vector.w;
-        } else if(subgroup_local_id < 128) {
             work_item_mask = work_item_mask_vector.z;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.w;
         }
         if (elect_work_item & work_item_mask){
             out[gid] = sub_group_elect();
@@ -224,7 +224,7 @@ std::string sub_group_elect_source = R"(
 )";
 
 std::string sub_group_non_uniform_any_all_all_equal_source = R"(
-    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
         int gid = get_global_id(0);
         XY(xy,gid);
         uint subgroup_local_id = get_sub_group_local_id();
@@ -235,9 +235,9 @@ std::string sub_group_non_uniform_any_all_all_equal_source = R"(
         } else if(subgroup_local_id < 64) {
             work_item_mask = work_item_mask_vector.y;
         } else if(subgroup_local_id < 96) {
-            work_item_mask = work_item_mask_vector.w;
-        } else if(subgroup_local_id < 128) {
             work_item_mask = work_item_mask_vector.z;
+        } else if(subgroup_local_id < 128) {
+            work_item_mask = work_item_mask_vector.w;
         }
         if (elect_work_item & work_item_mask){
                 out[gid] = %s(in[gid]);
@@ -267,7 +267,7 @@ int test_subgroup_functions_non_uniform_vote(cl_device_id device,
 
     constexpr size_t global_work_size = 170;
     constexpr size_t local_work_size = 64;
-    WorkGroupParams test_params(global_work_size, local_work_size, true);
+    WorkGroupParams test_params(global_work_size, local_work_size, 3);
     test_params.save_kernel_source(
         sub_group_non_uniform_any_all_all_equal_source);
     test_params.save_kernel_source(sub_group_elect_source, "sub_group_elect");
-- 
cgit v1.2.3


From 60471a520804fbd6611acd1c48f35549bb512deb Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Fri, 28 Jan 2022 09:15:44 +0000
Subject: Improve testing of sub_group_ballot (#1382)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_common/harness/mt19937.cpp                    |   2 +
 test_common/harness/mt19937.h                      |   3 +
 test_conformance/subgroups/subhelpers.h            |   6 +
 .../subgroups/test_subgroup_ballot.cpp             | 191 +++++++++++++++------
 4 files changed, 147 insertions(+), 55 deletions(-)

diff --git a/test_common/harness/mt19937.cpp b/test_common/harness/mt19937.cpp
index c32d9bac..f5665deb 100644
--- a/test_common/harness/mt19937.cpp
+++ b/test_common/harness/mt19937.cpp
@@ -277,3 +277,5 @@ double genrand_res53(MTdata d)
     unsigned long a = genrand_int32(d) >> 5, b = genrand_int32(d) >> 6;
     return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0);
 }
+
+bool genrand_bool(MTdata d) { return ((cl_uint)genrand_int32(d) & 1); }
diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 35c84933..98eec843 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -90,6 +90,9 @@ double genrand_res53(MTdata /*data*/);
 
 #ifdef __cplusplus
 
+/* generates a random boolean */
+bool genrand_bool(MTdata /*data*/);
+
 #include <cassert>
 
 struct MTdataHolder
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index aa4abc96..153045d0 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -34,6 +34,12 @@ extern MTdata gMTdata;
 typedef std::bitset<128> bs128;
 extern cl_half_rounding_mode g_rounding_mode;
 
+static bs128 cl_uint4_to_bs128(cl_uint4 v)
+{
+    return bs128(v.s0) | (bs128(v.s1) << 32) | (bs128(v.s2) << 64)
+        | (bs128(v.s3) << 96);
+}
+
 static cl_uint4 bs128_to_cl_uint4(bs128 v)
 {
     bs128 bs128_ffffffff = 0xffffffffU;
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 837988ea..4148707e 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -31,45 +31,93 @@ template <typename Ty> struct BALLOT
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        // no work here
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
+        int sb_number = (lws + sbs - 1) / sbs;
         int non_uniform_size = gws % lws;
+        int wg_number = gws / lws;
+        wg_number = non_uniform_size ? wg_number + 1 : wg_number;
+        int last_subgroup_size = 0;
+
+        for (int wg_id = 0; wg_id < wg_number; ++wg_id)
+        { // for each work_group
+            if (non_uniform_size && wg_id == wg_number - 1)
+            {
+                set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
+                                          last_subgroup_size);
+            }
+            for (int sb_id = 0; sb_id < sb_number; ++sb_id)
+            { // for each subgroup
+                int wg_offset = sb_id * sbs;
+                int current_sbs;
+                if (last_subgroup_size && sb_id == sb_number - 1)
+                {
+                    current_sbs = last_subgroup_size;
+                }
+                else
+                {
+                    current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
+                }
+
+                for (int wi_id = 0; wi_id < current_sbs; wi_id++)
+                {
+                    cl_uint v;
+                    if (genrand_bool(gMTdata))
+                    {
+                        v = genrand_bool(gMTdata);
+                    }
+                    else if (genrand_bool(gMTdata))
+                    {
+                        v = 1U << ((genrand_int32(gMTdata) % 31) + 1);
+                    }
+                    else
+                    {
+                        v = genrand_int32(gMTdata);
+                    }
+                    cl_uint4 v4 = { v, 0, 0, 0 };
+                    t[wi_id + wg_offset] = v4;
+                }
+            }
+            // Now map into work group using map from device
+            for (int wi_id = 0; wi_id < lws; ++wi_id)
+            {
+                x[wi_id] = t[wi_id];
+            }
+            x += lws;
+            m += 4 * lws;
+        }
     }
 
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
                            const WorkGroupParams &test_params)
     {
-        int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
         int sb_number = (lws + sbs - 1) / sbs;
-        int current_sbs = 0;
-        cl_uint expected_result, device_result;
         int non_uniform_size = gws % lws;
         int wg_number = gws / lws;
         wg_number = non_uniform_size ? wg_number + 1 : wg_number;
         int last_subgroup_size = 0;
 
-        for (wg_id = 0; wg_id < wg_number; ++wg_id)
+        for (int wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
             if (non_uniform_size && wg_id == wg_number - 1)
             {
                 set_last_workgroup_params(non_uniform_size, sb_number, sbs, lws,
                                           last_subgroup_size);
             }
-
-            for (wi_id = 0; wi_id < lws; ++wi_id)
+            for (int wi_id = 0; wi_id < lws; ++wi_id)
             { // inside the work_group
-                // read device outputs for work_group
-                my[wi_id] = y[wi_id];
+                mx[wi_id] = x[wi_id]; // read host inputs for work_group
+                my[wi_id] = y[wi_id]; // read device outputs for work_group
             }
 
-            for (sb_id = 0; sb_id < sb_number; ++sb_id)
+            for (int sb_id = 0; sb_id < sb_number; ++sb_id)
             { // for each subgroup
                 int wg_offset = sb_id * sbs;
+                int current_sbs;
                 if (last_subgroup_size && sb_id == sb_number - 1)
                 {
                     current_sbs = last_subgroup_size;
@@ -78,25 +126,54 @@ template <typename Ty> struct BALLOT
                 {
                     current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
                 }
-                for (wi_id = 0; wi_id < current_sbs; ++wi_id)
+
+                bs128 expected_result_bs = 0;
+
+                std::set<int> active_work_items;
+                for (int wi_id = 0; wi_id < current_sbs; ++wi_id)
                 {
-                    device_result = my[wg_offset + wi_id];
-                    expected_result = 1;
-                    if (!compare(device_result, expected_result))
+                    if (test_params.work_items_mask.test(wi_id))
+                    {
+                        bool predicate = (mx[wg_offset + wi_id].s0 != 0);
+                        expected_result_bs |= (bs128(predicate) << wi_id);
+                        active_work_items.insert(wi_id);
+                    }
+                }
+                if (active_work_items.empty())
+                {
+                    continue;
+                }
+
+                cl_uint4 expected_result =
+                    bs128_to_cl_uint4(expected_result_bs);
+                for (const int &active_work_item : active_work_items)
+                {
+                    int wi_id = active_work_item;
+
+                    cl_uint4 device_result = my[wg_offset + wi_id];
+                    bs128 device_result_bs = cl_uint4_to_bs128(device_result);
+
+                    if (device_result_bs != expected_result_bs)
                     {
                         log_error(
                             "ERROR: sub_group_ballot mismatch for local id "
-                            "%d in sub group %d in group %d obtained %d, "
-                            "expected %d\n",
-                            wi_id, sb_id, wg_id, device_result,
-                            expected_result);
+                            "%d in sub group %d in group %d obtained {%d, %d, "
+                            "%d, %d}, expected {%d, %d, %d, %d}\n",
+                            wi_id, sb_id, wg_id, device_result.s0,
+                            device_result.s1, device_result.s2,
+                            device_result.s3, expected_result.s0,
+                            expected_result.s1, expected_result.s2,
+                            expected_result.s3);
                         return TEST_FAIL;
                     }
                 }
             }
+
+            x += lws;
             y += lws;
             m += 4 * lws;
         }
+
         return TEST_PASS;
     }
 };
@@ -724,27 +801,26 @@ __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type
 }
 )";
 std::string sub_group_ballot_source = R"(
-__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out) {
-    uint4 full_ballot = sub_group_ballot(1);
-    uint divergence_mask;
-    uint4 partial_ballot;
+__kernel void test_sub_group_ballot(const __global Type *in, __global int4 *xy, __global Type *out, uint4 work_item_mask_vector) {
     uint gid = get_global_id(0);
     XY(xy,gid);
-    if (get_sub_group_local_id() & 1) {
-        divergence_mask = 0xaaaaaaaa;
-        partial_ballot = sub_group_ballot(1);
-    } else {
-        divergence_mask = 0x55555555;
-        partial_ballot = sub_group_ballot(1);
+    uint subgroup_local_id = get_sub_group_local_id();
+    uint elect_work_item = 1 << (subgroup_local_id % 32);
+    uint work_item_mask;
+    if (subgroup_local_id < 32) {
+        work_item_mask = work_item_mask_vector.x;
+    } else if(subgroup_local_id < 64) {
+        work_item_mask = work_item_mask_vector.y;
+    } else if(subgroup_local_id < 96) {
+        work_item_mask = work_item_mask_vector.z;
+    } else if(subgroup_local_id < 128) {
+        work_item_mask = work_item_mask_vector.w;
     }
-     size_t lws = get_local_size(0);
-    uint4 masked_ballot = full_ballot;
-    masked_ballot.x &= divergence_mask;
-    masked_ballot.y &= divergence_mask;
-    masked_ballot.z &= divergence_mask;
-    masked_ballot.w &= divergence_mask;
-    out[gid] = all(masked_ballot == partial_ballot);
-
+    uint4 value = (uint4)(0, 0, 0, 0);
+    if (elect_work_item & work_item_mask) {
+        value = sub_group_ballot(in[gid].s0);
+    }
+    out[gid] = value;
 }
 )";
 std::string sub_group_inverse_ballot_source = R"(
@@ -952,42 +1028,47 @@ int test_subgroup_functions_ballot(cl_device_id device, cl_context context,
     error |= rft.run_impl<cl_uint4, SMASK<cl_uint4, BallotOp::lt_mask>>(
         "get_sub_group_lt_mask");
 
-    // ballot functions
-    WorkGroupParams test_params_ballot(global_work_size, local_work_size);
-    test_params_ballot.save_kernel_source(
-        sub_group_ballot_bit_scan_find_source);
-    test_params_ballot.save_kernel_source(sub_group_ballot_source,
-                                          "sub_group_ballot");
-    test_params_ballot.save_kernel_source(sub_group_inverse_ballot_source,
-                                          "sub_group_inverse_ballot");
-    test_params_ballot.save_kernel_source(sub_group_ballot_bit_extract_source,
-                                          "sub_group_ballot_bit_extract");
+    // sub_group_ballot function
+    WorkGroupParams test_params_ballot(global_work_size, local_work_size, 3);
+    test_params_ballot.save_kernel_source(sub_group_ballot_source);
     RunTestForType rft_ballot(device, context, queue, num_elements,
                               test_params_ballot);
-    error |= rft_ballot.run_impl<cl_uint, BALLOT<cl_uint>>("sub_group_ballot");
     error |=
-        rft_ballot.run_impl<cl_uint4,
-                            BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
+        rft_ballot.run_impl<cl_uint4, BALLOT<cl_uint4>>("sub_group_ballot");
+
+    // ballot arithmetic functions
+    WorkGroupParams test_params_arith(global_work_size, local_work_size);
+    test_params_arith.save_kernel_source(sub_group_ballot_bit_scan_find_source);
+    test_params_arith.save_kernel_source(sub_group_inverse_ballot_source,
+                                         "sub_group_inverse_ballot");
+    test_params_arith.save_kernel_source(sub_group_ballot_bit_extract_source,
+                                         "sub_group_ballot_bit_extract");
+    RunTestForType rft_arith(device, context, queue, num_elements,
+                             test_params_arith);
+    error |=
+        rft_arith.run_impl<cl_uint4,
+                           BALLOT_INVERSE<cl_uint4, BallotOp::inverse_ballot>>(
             "sub_group_inverse_ballot");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4, BALLOT_BIT_EXTRACT<cl_uint4, BallotOp::ballot_bit_extract>>(
         "sub_group_ballot_bit_extract");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_bit_count>>(
         "sub_group_ballot_bit_count");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4,
         BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_inclusive_scan>>(
         "sub_group_ballot_inclusive_scan");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4,
         BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_exclusive_scan>>(
         "sub_group_ballot_exclusive_scan");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_lsb>>(
         "sub_group_ballot_find_lsb");
-    error |= rft_ballot.run_impl<
+    error |= rft_arith.run_impl<
         cl_uint4, BALLOT_COUNT_SCAN_FIND<cl_uint4, BallotOp::ballot_find_msb>>(
         "sub_group_ballot_find_msb");
+
     return error;
 }
-- 
cgit v1.2.3


From 6b14d408dc8cc0a05bca554e8b43d269fba179d0 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Thu, 10 Feb 2022 06:24:33 +0000
Subject: Improve testing of kernel arg info in pipe_info test (#1326)

The test now checks that CL_KERNEL_ARG_INFO_NOT_AVAILABLE is returned
when calling clGetKernelArgInfo() with offline compilation modes.

The correct function name is printed if clGetKernelArgInfo() fails
when using online compilation (and not "clSetKernelArgInfo()").

When using online compilation, if the actual arg type is not as
expected, the actual arg type is now logged, and the return value
is now TEST_FAIL (-1) as per other failures (and not 1).

All other test pass/fail values used in the test now use TEST_PASS
and TEST_FAIL instead of 0 and -1 literals.

An unnecessary cast of pipe_kernel_code has been removed.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/pipes/test_pipe_info.cpp | 40 ++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/test_conformance/pipes/test_pipe_info.cpp b/test_conformance/pipes/test_pipe_info.cpp
index 7543c6cd..e7b486db 100644
--- a/test_conformance/pipes/test_pipe_info.cpp
+++ b/test_conformance/pipes/test_pipe_info.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "procs.h"
+#include "harness/parseParameters.h"
 
 const char* pipe_kernel_code = {
     "__kernel void pipe_kernel(__write_only pipe int out_pipe)\n"
@@ -39,8 +40,7 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
 
     if (pipe_width != returnVal)
     {
-        log_error("Error in clGetPipeInfo() check of pipe packet size\n");
-        return -1;
+        test_fail("Error in clGetPipeInfo() check of pipe packet size\n");
     }
     else
     {
@@ -52,29 +52,37 @@ int test_pipe_info( cl_device_id deviceID, cl_context context, cl_command_queue
 
     if(pipe_depth != returnVal)
     {
-        log_error( "Error in clGetPipeInfo() check of pipe max packets\n" );
-        return -1;
+        test_fail("Error in clGetPipeInfo() check of pipe max packets\n");
     }
     else
     {
         log_info( " CL_PIPE_MAX_PACKETS passed.\n" );
     }
 
-    err = create_single_kernel_helper_with_build_options(context, &program, &kernel, 1, (const char**)&pipe_kernel_code, "pipe_kernel", "-cl-std=CL2.0 -cl-kernel-arg-info");
-    test_error_ret(err, " Error creating program", -1);
+    err = create_single_kernel_helper_with_build_options(
+        context, &program, &kernel, 1, &pipe_kernel_code, "pipe_kernel",
+        "-cl-std=CL2.0 -cl-kernel-arg-info");
+    test_error_fail(err, "Error creating program");
 
     cl_kernel_arg_type_qualifier arg_type_qualifier = 0;
-    cl_kernel_arg_type_qualifier expected_type_qualifier = CL_KERNEL_ARG_TYPE_PIPE;
-    err = clGetKernelArgInfo( kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER, sizeof(arg_type_qualifier), &arg_type_qualifier, NULL );
-    test_error_ret(err, " clSetKernelArgInfo failed", -1);
-    err = (arg_type_qualifier != expected_type_qualifier);
-
-    if(err)
+    err = clGetKernelArgInfo(kernel, 0, CL_KERNEL_ARG_TYPE_QUALIFIER,
+                             sizeof(arg_type_qualifier), &arg_type_qualifier,
+                             NULL);
+    if (gCompilationMode == kOnline)
     {
-        print_error(err, "ERROR: Bad type qualifier\n");
-        return -1;
+        test_error_fail(err, "clGetKernelArgInfo failed");
+        if (arg_type_qualifier != CL_KERNEL_ARG_TYPE_PIPE)
+        {
+            test_fail("ERROR: Incorrect type qualifier: %i\n",
+                      arg_type_qualifier);
+        }
+    }
+    else
+    {
+        test_failure_error_ret(err, CL_KERNEL_ARG_INFO_NOT_AVAILABLE,
+                               "clGetKernelArgInfo error not as expected",
+                               TEST_FAIL);
     }
 
-    return err;
-
+    return TEST_PASS;
 }
-- 
cgit v1.2.3


From 2d93b122c3078cd67a0528ad9e791dbcadaf03d6 Mon Sep 17 00:00:00 2001
From: Jim Lewis <j.lewis1@samsung.com>
Date: Tue, 22 Feb 2022 10:49:35 -0600
Subject: Sync submission_details with conformance doc v26 (#1389)

Add "Patches" field
---
 test_conformance/submission_details_template.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test_conformance/submission_details_template.txt b/test_conformance/submission_details_template.txt
index 9d276a62..ff624837 100644
--- a/test_conformance/submission_details_template.txt
+++ b/test_conformance/submission_details_template.txt
@@ -81,6 +81,12 @@ Platform Version:
 # 
 Tests version:
 
+# Commit SHAs (7-digit) of any cherry-picked patches subsequent to tagged
+# version. Any patches included must apply without conflicts to the tagged
+# version in the order listed.
+#
+Patches:
+
 # Implementations that support cl_khr_icd are required to use a loader to run
 # the tests and document the loader that was used.
 #
-- 
cgit v1.2.3


From 279803ababb0495843c05103a8d4a2e4a1fdf017 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Wed, 2 Mar 2022 13:25:53 +0000
Subject: Refactor kernel execution in subgroup tests (#1391)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/subhelpers.h | 212 ++++++++++++++++++--------------
 1 file changed, 123 insertions(+), 89 deletions(-)

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 153045d0..b88d2278 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1322,73 +1322,129 @@ inline bool compare_ordered(const subgroups::cl_half &lhs, const int &rhs)
     return cl_half_to_float(lhs.data) == rhs;
 }
 
-// Run a test kernel to compute the result of a built-in on an input
-static int run_kernel(cl_context context, cl_command_queue queue,
-                      cl_kernel kernel, size_t global, size_t local,
-                      void *idata, size_t isize, void *mdata, size_t msize,
-                      void *odata, size_t osize, size_t tsize = 0)
-{
-    clMemWrapper in;
-    clMemWrapper xy;
-    clMemWrapper out;
-    clMemWrapper tmp;
-    int error;
+template <typename Ty, typename Fns> class KernelExecutor {
+public:
+    KernelExecutor(cl_context c, cl_command_queue q, cl_kernel k, size_t g,
+                   size_t l, Ty *id, size_t is, Ty *mid, Ty *mod, cl_int *md,
+                   size_t ms, Ty *od, size_t os, size_t ts = 0)
+        : context(c), queue(q), kernel(k), global(g), local(l), idata(id),
+          isize(is), mapin_data(mid), mapout_data(mod), mdata(md), msize(ms),
+          odata(od), osize(os), tsize(ts)
+    {
+        has_status = false;
+        run_failed = false;
+    }
+    cl_context context;
+    cl_command_queue queue;
+    cl_kernel kernel;
+    size_t global;
+    size_t local;
+    Ty *idata;
+    size_t isize;
+    Ty *mapin_data;
+    Ty *mapout_data;
+    cl_int *mdata;
+    size_t msize;
+    Ty *odata;
+    size_t osize;
+    size_t tsize;
+    bool run_failed;
 
-    in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
+private:
+    bool has_status;
+    test_status status;
 
-    xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
+public:
+    // Run a test kernel to compute the result of a built-in on an input
+    int run()
+    {
+        clMemWrapper in;
+        clMemWrapper xy;
+        clMemWrapper out;
+        clMemWrapper tmp;
+        int error;
 
-    out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
-    test_error(error, "clCreateBuffer failed");
+        in = clCreateBuffer(context, CL_MEM_READ_ONLY, isize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
 
-    if (tsize)
-    {
-        tmp = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
-                             tsize, NULL, &error);
+        xy = clCreateBuffer(context, CL_MEM_WRITE_ONLY, msize, NULL, &error);
         test_error(error, "clCreateBuffer failed");
-    }
 
-    error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
-    test_error(error, "clSetKernelArg failed");
+        out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, osize, NULL, &error);
+        test_error(error, "clCreateBuffer failed");
 
-    error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
-    test_error(error, "clSetKernelArg failed");
+        if (tsize)
+        {
+            tmp = clCreateBuffer(context,
+                                 CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS,
+                                 tsize, NULL, &error);
+            test_error(error, "clCreateBuffer failed");
+        }
 
-    error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
-    test_error(error, "clSetKernelArg failed");
+        error = clSetKernelArg(kernel, 0, sizeof(in), (void *)&in);
+        test_error(error, "clSetKernelArg failed");
 
-    if (tsize)
-    {
-        error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
+        error = clSetKernelArg(kernel, 1, sizeof(xy), (void *)&xy);
         test_error(error, "clSetKernelArg failed");
-    }
 
-    error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0, NULL,
-                                 NULL);
-    test_error(error, "clEnqueueWriteBuffer failed");
+        error = clSetKernelArg(kernel, 2, sizeof(out), (void *)&out);
+        test_error(error, "clSetKernelArg failed");
+
+        if (tsize)
+        {
+            error = clSetKernelArg(kernel, 3, sizeof(tmp), (void *)&tmp);
+            test_error(error, "clSetKernelArg failed");
+        }
+
+        error = clEnqueueWriteBuffer(queue, in, CL_FALSE, 0, isize, idata, 0,
+                                     NULL, NULL);
+        test_error(error, "clEnqueueWriteBuffer failed");
 
-    error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
-                                 NULL);
-    test_error(error, "clEnqueueWriteBuffer failed");
-    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0,
-                                   NULL, NULL);
-    test_error(error, "clEnqueueNDRangeKernel failed");
+        error = clEnqueueWriteBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
+                                     NULL, NULL);
+        test_error(error, "clEnqueueWriteBuffer failed");
+        error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local,
+                                       0, NULL, NULL);
+        test_error(error, "clEnqueueNDRangeKernel failed");
 
-    error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0, NULL,
-                                NULL);
-    test_error(error, "clEnqueueReadBuffer failed");
+        error = clEnqueueReadBuffer(queue, xy, CL_FALSE, 0, msize, mdata, 0,
+                                    NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
 
-    error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0, NULL,
-                                NULL);
-    test_error(error, "clEnqueueReadBuffer failed");
+        error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, osize, odata, 0,
+                                    NULL, NULL);
+        test_error(error, "clEnqueueReadBuffer failed");
 
-    error = clFinish(queue);
-    test_error(error, "clFinish failed");
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
 
-    return error;
-}
+        return error;
+    }
+
+    test_status run_and_check(const WorkGroupParams &test_params)
+    {
+        cl_int error = run();
+        if (error != CL_SUCCESS)
+        {
+            print_error(error, "Failed to run subgroup test kernel");
+            status = TEST_FAIL;
+            run_failed = true;
+            return status;
+        }
+
+        test_status tmp_status =
+            Fns::chk(idata, odata, mapin_data, mapout_data, mdata, test_params);
+
+        if (!has_status || tmp_status == TEST_FAIL
+            || (tmp_status == TEST_PASS && status != TEST_FAIL))
+        {
+            status = tmp_status;
+            has_status = true;
+        }
+
+        return status;
+    }
+};
 
 // Driver for testing a single built in function
 template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
@@ -1536,74 +1592,52 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             test_error_fail(error, "Unable to set divergence mask argument");
         }
 
+        KernelExecutor<Ty, Fns> executor(
+            context, queue, kernel, global, local, idata.data(),
+            input_array_size * sizeof(Ty), mapin.data(), mapout.data(),
+            sgmap.data(), global * sizeof(cl_int4), odata.data(),
+            output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
+
         // Run the kernel once on zeroes to get the map
         memset(idata.data(), 0, input_array_size * sizeof(Ty));
-        error = run_kernel(context, queue, kernel, global, local, idata.data(),
-                           input_array_size * sizeof(Ty), sgmap.data(),
-                           global * sizeof(cl_int4), odata.data(),
-                           output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
+        error = executor.run();
         test_error_fail(error, "Running kernel first time failed");
 
         // Generate the desired input for the kernel
         test_params.subgroup_size = subgroup_size;
         Fns::gen(idata.data(), mapin.data(), sgmap.data(), test_params);
 
-        test_status combined_status;
+        test_status status;
 
         if (test_params.divergence_mask_arg != -1)
         {
-            combined_status = TEST_SKIPPED_ITSELF;
-
             for (auto &mask : test_params.all_work_item_masks)
             {
                 test_params.work_items_mask = mask;
                 cl_uint4 mask_vector = bs128_to_cl_uint4(mask);
                 clSetKernelArg(kernel, test_params.divergence_mask_arg,
                                sizeof(cl_uint4), &mask_vector);
-                error = run_kernel(context, queue, kernel, global, local,
-                                   idata.data(), input_array_size * sizeof(Ty),
-                                   sgmap.data(), global * sizeof(cl_int4),
-                                   odata.data(), output_array_size * sizeof(Ty),
-                                   TSIZE * sizeof(Ty));
-                test_error_fail(error, "Running kernel second time failed");
-
-                // Check the result
-                test_status status =
-                    Fns::chk(idata.data(), odata.data(), mapin.data(),
-                             mapout.data(), sgmap.data(), test_params);
-
-                if (status == TEST_FAIL
-                    || (status == TEST_PASS && combined_status != TEST_FAIL))
-                    combined_status = status;
+
+                status = executor.run_and_check(test_params);
 
                 if (status == TEST_FAIL) break;
             }
         }
         else
         {
-            error =
-                run_kernel(context, queue, kernel, global, local, idata.data(),
-                           input_array_size * sizeof(Ty), sgmap.data(),
-                           global * sizeof(cl_int4), odata.data(),
-                           output_array_size * sizeof(Ty), TSIZE * sizeof(Ty));
-            test_error_fail(error, "Running kernel second time failed");
-
-            // Check the result
-            combined_status =
-                Fns::chk(idata.data(), odata.data(), mapin.data(),
-                         mapout.data(), sgmap.data(), test_params);
+            status = executor.run_and_check(test_params);
         }
-        // Detailed failure and skip messages should be logged by Fns::gen
-        // and Fns::chk.
-        if (combined_status == TEST_PASS)
+        // Detailed failure and skip messages should be logged by
+        // run_and_check.
+        if (status == TEST_PASS)
         {
             Fns::log_test(test_params, " passed");
         }
-        else if (combined_status == TEST_FAIL)
+        else if (!executor.run_failed && status == TEST_FAIL)
         {
             test_fail("Data verification failed\n");
         }
-        return combined_status;
+        return status;
     }
 };
 
-- 
cgit v1.2.3


From d36196b662fb64d5610e027d836985bfc87ae07b Mon Sep 17 00:00:00 2001
From: Alastair Murray <alastair.murray@codeplay.com>
Date: Wed, 2 Mar 2022 18:27:52 +0000
Subject: Update format script and drop Travis badge for branch rename (#1393)

`master` is now `main`, so update `check-format.sh` accordingly.

Also completely drop the Travis badge as we now use GitHub actions.  There is
no replacement badge as the current action is pre-submission, not
post-submission.
---
 README.md       | 2 +-
 check-format.sh | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index b2d825fc..796f7c86 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,2 @@
-# OpenCL-CTS [![Build Status](https://api.travis-ci.org/KhronosGroup/OpenCL-CTS.svg?branch=master)](https://travis-ci.org/KhronosGroup/OpenCL-CTS/branches)
+# OpenCL-CTS
 The OpenCL Conformance Tests
diff --git a/check-format.sh b/check-format.sh
index 7de2bd2c..be8f9d78 100755
--- a/check-format.sh
+++ b/check-format.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 
-# Arg used to specify non-'origin/master' comparison branch
-ORIGIN_BRANCH=${1:-"origin/master"}
+# Arg used to specify non-'origin/main' comparison branch
+ORIGIN_BRANCH=${1:-"origin/main"}
 CLANG_BINARY=${2:-"`which clang-format-9`"}
 
 # Run git-clang-format to check for violations
-- 
cgit v1.2.3


From e437acd908b435e65655ae31e210511f434e108c Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Wed, 2 Mar 2022 18:28:12 +0000
Subject: Added simple test for CL_DEVICE_PRINTF_BUFFER_SIZE. (#1386)

* Added simple test for CL_DEVICE_PRINTF_BUFFER_SIZE.

* Clang format fix.
---
 test_conformance/printf/test_printf.cpp | 134 ++++++++++++++++----------------
 1 file changed, 68 insertions(+), 66 deletions(-)

diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 2b804e40..12ff6535 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -825,73 +825,75 @@ int test_address_space_4(cl_device_id deviceID, cl_context context, cl_command_q
     return doTest(gQueue, gContext, TYPE_ADDRESS_SPACE, 4, deviceID);
 }
 
+int test_buffer_size(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    size_t printf_buff_size = 0;
+    const size_t printf_buff_size_req = !gIsEmbedded ? (1024 * 1024UL) : 1024UL;
+    const size_t config_size = sizeof(printf_buff_size);
+    cl_int err = CL_SUCCESS;
+
+    err = clGetDeviceInfo(deviceID, CL_DEVICE_PRINTF_BUFFER_SIZE, config_size,
+                          &printf_buff_size, NULL);
+    if (err != CL_SUCCESS)
+    {
+        log_error("Unable to query CL_DEVICE_PRINTF_BUFFER_SIZE");
+        return TEST_FAIL;
+    }
+
+    if (printf_buff_size < printf_buff_size_req)
+    {
+        log_error("CL_DEVICE_PRINTF_BUFFER_SIZE does not meet requirements");
+        return TEST_FAIL;
+    }
+
+    return TEST_PASS;
+}
+
 test_definition test_list[] = {
-    ADD_TEST( int_0 ),
-    ADD_TEST( int_1 ),
-    ADD_TEST( int_2 ),
-    ADD_TEST( int_3 ),
-    ADD_TEST( int_4 ),
-    ADD_TEST( int_5 ),
-    ADD_TEST( int_6 ),
-    ADD_TEST( int_7 ),
-    ADD_TEST( int_8 ),
-
-    ADD_TEST( float_0 ),
-    ADD_TEST( float_1 ),
-    ADD_TEST( float_2 ),
-    ADD_TEST( float_3 ),
-    ADD_TEST( float_4 ),
-    ADD_TEST( float_5 ),
-    ADD_TEST( float_6 ),
-    ADD_TEST( float_7 ),
-    ADD_TEST( float_8 ),
-    ADD_TEST( float_9 ),
-    ADD_TEST( float_10 ),
-    ADD_TEST( float_11 ),
-    ADD_TEST( float_12 ),
-    ADD_TEST( float_13 ),
-    ADD_TEST( float_14 ),
-    ADD_TEST( float_15 ),
-    ADD_TEST( float_16 ),
-    ADD_TEST( float_17 ),
-
-    ADD_TEST( float_limits_0 ),
-    ADD_TEST( float_limits_1 ),
-    ADD_TEST( float_limits_2 ),
-
-    ADD_TEST( octal_0 ),
-    ADD_TEST( octal_1 ),
-    ADD_TEST( octal_2 ),
-    ADD_TEST( octal_3 ),
-
-    ADD_TEST( unsigned_0 ),
-    ADD_TEST( unsigned_1 ),
-
-    ADD_TEST( hexadecimal_0 ),
-    ADD_TEST( hexadecimal_1 ),
-    ADD_TEST( hexadecimal_2 ),
-    ADD_TEST( hexadecimal_3 ),
-    ADD_TEST( hexadecimal_4 ),
-
-    ADD_TEST( char_0 ),
-    ADD_TEST( char_1 ),
-    ADD_TEST( char_2 ),
-
-    ADD_TEST( string_0 ),
-    ADD_TEST( string_1 ),
-    ADD_TEST( string_2 ),
-
-    ADD_TEST( vector_0 ),
-    ADD_TEST( vector_1 ),
-    ADD_TEST( vector_2 ),
-    ADD_TEST( vector_3 ),
-    ADD_TEST( vector_4 ),
-
-    ADD_TEST( address_space_0 ),
-    ADD_TEST( address_space_1 ),
-    ADD_TEST( address_space_2 ),
-    ADD_TEST( address_space_3 ),
-    ADD_TEST( address_space_4 ),
+    ADD_TEST(int_0),           ADD_TEST(int_1),
+    ADD_TEST(int_2),           ADD_TEST(int_3),
+    ADD_TEST(int_4),           ADD_TEST(int_5),
+    ADD_TEST(int_6),           ADD_TEST(int_7),
+    ADD_TEST(int_8),
+
+    ADD_TEST(float_0),         ADD_TEST(float_1),
+    ADD_TEST(float_2),         ADD_TEST(float_3),
+    ADD_TEST(float_4),         ADD_TEST(float_5),
+    ADD_TEST(float_6),         ADD_TEST(float_7),
+    ADD_TEST(float_8),         ADD_TEST(float_9),
+    ADD_TEST(float_10),        ADD_TEST(float_11),
+    ADD_TEST(float_12),        ADD_TEST(float_13),
+    ADD_TEST(float_14),        ADD_TEST(float_15),
+    ADD_TEST(float_16),        ADD_TEST(float_17),
+
+    ADD_TEST(float_limits_0),  ADD_TEST(float_limits_1),
+    ADD_TEST(float_limits_2),
+
+    ADD_TEST(octal_0),         ADD_TEST(octal_1),
+    ADD_TEST(octal_2),         ADD_TEST(octal_3),
+
+    ADD_TEST(unsigned_0),      ADD_TEST(unsigned_1),
+
+    ADD_TEST(hexadecimal_0),   ADD_TEST(hexadecimal_1),
+    ADD_TEST(hexadecimal_2),   ADD_TEST(hexadecimal_3),
+    ADD_TEST(hexadecimal_4),
+
+    ADD_TEST(char_0),          ADD_TEST(char_1),
+    ADD_TEST(char_2),
+
+    ADD_TEST(string_0),        ADD_TEST(string_1),
+    ADD_TEST(string_2),
+
+    ADD_TEST(vector_0),        ADD_TEST(vector_1),
+    ADD_TEST(vector_2),        ADD_TEST(vector_3),
+    ADD_TEST(vector_4),
+
+    ADD_TEST(address_space_0), ADD_TEST(address_space_1),
+    ADD_TEST(address_space_2), ADD_TEST(address_space_3),
+    ADD_TEST(address_space_4),
+
+    ADD_TEST(buffer_size),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
-- 
cgit v1.2.3


From ae217e8bd2de2ea7dc9a8d50574530a2a29e4be9 Mon Sep 17 00:00:00 2001
From: Jack Frankland <30410009+FranklandJack@users.noreply.github.com>
Date: Wed, 2 Mar 2022 18:30:31 +0000
Subject: Check for non-uniform work-group support (#1383)

Only run sub-group tests with non-uniform work-groups on OpenCL 3.0 and
later if it is supported by the device.
---
 test_conformance/subgroups/test_workitem.cpp | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/test_conformance/subgroups/test_workitem.cpp b/test_conformance/subgroups/test_workitem.cpp
index 7ffa6a7c..b69f3138 100644
--- a/test_conformance/subgroups/test_workitem.cpp
+++ b/test_conformance/subgroups/test_workitem.cpp
@@ -16,6 +16,7 @@
 #include "procs.h"
 #include "harness/conversions.h"
 #include "harness/typeWrappers.h"
+#include <CL/cl.h>
 
 struct get_test_data
 {
@@ -251,8 +252,21 @@ int test_work_item_functions(cl_device_id device, cl_context context,
 
     global = local * 5;
 
-    // Make sure we have a flexible range
-    global += 3 * local / 4;
+    // Non-uniform work-groups are an optional feature from 3.0 onward.
+    cl_bool device_supports_non_uniform_wg = CL_TRUE;
+    if (get_device_cl_version(device) >= Version(3, 0))
+    {
+        error = clGetDeviceInfo(
+            device, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, sizeof(cl_bool),
+            &device_supports_non_uniform_wg, nullptr);
+        test_error(error, "clGetDeviceInfo failed");
+    }
+
+    if (device_supports_non_uniform_wg)
+    {
+        // Make sure we have a flexible range
+        global += 3 * local / 4;
+    }
 
     // Collect the data
     memset((void *)&result, 0xf0, sizeof(result));
@@ -327,4 +341,4 @@ int test_work_item_functions_ext(cl_device_id device, cl_context context,
 
     return test_work_item_functions(device, context, queue, num_elements,
                                     false);
-}
\ No newline at end of file
+}
-- 
cgit v1.2.3


From 3c4a1a3ce6ddb8880e1be7a6a1bb525b28db6e7f Mon Sep 17 00:00:00 2001
From: Jim Lewis <j.lewis1@samsung.com>
Date: Wed, 2 Mar 2022 12:31:14 -0600
Subject: Fix build error for linux with clang-8 (#1304)

-Wabsolute-value warning reported as error (long double truncated to
double)
---
 test_conformance/math_brute_force/reference_math.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 0b037e01..16db3d67 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -4549,8 +4549,8 @@ long double reference_powl(long double x, long double y)
     if (x != x || y != y) return x + y;
 
     // do the work required to sort out edge cases
-    double fabsy = reference_fabs(y);
-    double fabsx = reference_fabs(x);
+    double fabsy = (double)reference_fabsl(y);
+    double fabsx = (double)reference_fabsl(x);
     double iy = reference_rint(
         fabsy); // we do round to nearest here so that |fy| <= 0.5
     if (iy > fabsy) // convert nearbyint to floor
@@ -4637,13 +4637,13 @@ long double reference_powl(long double x, long double y)
 
     // compute product of y*log2(x)
     // scale to avoid overflow in double-double multiplication
-    if (reference_fabs(y) > HEX_DBL(+, 1, 0, +, 970))
+    if (fabsy > HEX_DBL(+, 1, 0, +, 970))
     {
         y_hi = reference_ldexp(y_hi, -53);
         y_lo = reference_ldexp(y_lo, -53);
     }
     MulDD(&ylog2x_hi, &ylog2x_lo, log2x_hi, log2x_lo, y_hi, y_lo);
-    if (fabs(y) > HEX_DBL(+, 1, 0, +, 970))
+    if (fabsy > HEX_DBL(+, 1, 0, +, 970))
     {
         ylog2x_hi = reference_ldexp(ylog2x_hi, 53);
         ylog2x_lo = reference_ldexp(ylog2x_lo, 53);
-- 
cgit v1.2.3


From bbc7ccfc58386bea759ef4fa2cd47888172ad76a Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Wed, 2 Mar 2022 10:34:06 -0800
Subject: add a prefix to OpenCL extension names (#1311)

* add a prefix to OpenCL extension names

* fix formatting
---
 test_conformance/computeinfo/main.cpp  |  12 ++--
 test_conformance/spir/run_services.cpp | 126 +++++++++++++++++----------------
 test_conformance/spir/run_services.h   |  59 +++++++--------
 3 files changed, 97 insertions(+), 100 deletions(-)

diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index d993655b..03bdb2c1 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -95,8 +95,8 @@ typedef struct _version version_t;
 
 struct _extensions
 {
-    int cl_khr_fp64;
-    int cl_khr_fp16;
+    int has_cl_khr_fp64;
+    int has_cl_khr_fp16;
 };
 typedef struct _extensions extensions_t;
 
@@ -1069,11 +1069,11 @@ int parseExtensions(char const* str, extensions_t* extensions)
         }
         if (strncmp(begin, "cl_khr_fp64", length) == 0)
         {
-            extensions->cl_khr_fp64 = 1;
+            extensions->has_cl_khr_fp64 = 1;
         }
         if (strncmp(begin, "cl_khr_fp16", length) == 0)
         {
-            extensions->cl_khr_fp16 = 1;
+            extensions->has_cl_khr_fp16 = 1;
         }
         begin += length; // Skip word.
         if (begin[0] == ' ')
@@ -1112,13 +1112,13 @@ int getConfigInfos(cl_device_id device)
             // version 1.1, we have to check doubles are sopported. In
             // OpenCL 1.2 CL_DEVICE_DOUBLE_FP_CONFIG should be reported
             // unconditionally.
-            get = extensions.cl_khr_fp64;
+            get = extensions.has_cl_khr_fp64;
         };
         if (info.opcode == CL_DEVICE_HALF_FP_CONFIG)
         {
             // CL_DEVICE_HALF_FP_CONFIG should be reported only when cl_khr_fp16
             // extension is available
-            get = extensions.cl_khr_fp16;
+            get = extensions.has_cl_khr_fp16;
         };
         if (get)
         {
diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
index 06fc418d..3162e16f 100644
--- a/test_conformance/spir/run_services.cpp
+++ b/test_conformance/spir/run_services.cpp
@@ -389,6 +389,7 @@ OclExtensions OclExtensions::getDeviceCapabilities(cl_device_id devId)
     {
         ret = ret | OclExtensions::fromString(*it);
     }
+
     return ret;
 }
 
@@ -399,75 +400,80 @@ OclExtensions OclExtensions::empty()
 
 OclExtensions OclExtensions::fromString(const std::string& e)
 {
-    std::string s = "OclExtensions::" + e;
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_base_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_int64_extended_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_3d_image_writes);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp16);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_sharing);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_event);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d10_sharing);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_dx9_media_sharing);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_d3d11_sharing);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_depth_images);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_depth_images);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_gl_msaa_sharing);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_image2d_from_buffer);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_initialize_memory);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_spir);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_fp64);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_base_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_global_int32_extended_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_base_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_local_int32_extended_atomics);
-    RETURN_IF_ENUM(s, OclExtensions::cl_khr_byte_addressable_store);
-    RETURN_IF_ENUM(s, OclExtensions::cles_khr_int64);
-    RETURN_IF_ENUM(s, OclExtensions::cles_khr_2d_image_array_writes);
+    std::string s = "OclExtensions::has_" + e;
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_int64_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_3d_image_writes);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp16);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_event);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d10_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_dx9_media_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_d3d11_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_depth_images);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_depth_images);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_gl_msaa_sharing);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_image2d_from_buffer);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_initialize_memory);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_spir);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_fp64);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_global_int32_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_base_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_local_int32_extended_atomics);
+    RETURN_IF_ENUM(s, OclExtensions::has_cl_khr_byte_addressable_store);
+    RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_int64);
+    RETURN_IF_ENUM(s, OclExtensions::has_cles_khr_2d_image_array_writes);
     // Unknown KHR string.
     return OclExtensions::empty();
 }
 
 std::string OclExtensions::toString()
 {
-
-    #define APPEND_STR_IF_SUPPORTS( STR, E)                          \
-        if ( this->supports(E) )                                     \
-        {                                                            \
-            std::string ext_str( #E );                               \
-            std::string prefix = "OclExtensions::";                  \
-            size_t pos = ext_str.find( prefix );                     \
-            if (  pos != std::string::npos )                         \
-            {                                                        \
-                ext_str.replace( pos, prefix.length(), "");          \
-            }                                                        \
-            STR += ext_str;                                          \
-        }
+#define APPEND_STR_IF_SUPPORTS(STR, E)                                         \
+    if (this->supports(E))                                                     \
+    {                                                                          \
+        std::string ext_str(#E);                                               \
+        std::string prefix = "OclExtensions::has_";                            \
+        size_t pos = ext_str.find(prefix);                                     \
+        if (pos != std::string::npos)                                          \
+        {                                                                      \
+            ext_str.replace(pos, prefix.length(), "");                         \
+        }                                                                      \
+        STR += ext_str;                                                        \
+        STR += " ";                                                            \
+    }
 
     std::string s = "";
 
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_base_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_int64_extended_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_3d_image_writes );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp16 );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_sharing );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_event );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d10_sharing );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_dx9_media_sharing );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_d3d11_sharing );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_depth_images );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_depth_images );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_gl_msaa_sharing );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_image2d_from_buffer );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_initialize_memory );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_spir );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_fp64 );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_base_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_global_int32_extended_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_base_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_local_int32_extended_atomics );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cl_khr_byte_addressable_store );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_int64 );
-    APPEND_STR_IF_SUPPORTS( s, OclExtensions::cles_khr_2d_image_array_writes );
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_base_atomics);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_int64_extended_atomics);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_3d_image_writes);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp16);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_sharing);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_event);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d10_sharing);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_dx9_media_sharing);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_d3d11_sharing);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_depth_images);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_depth_images);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_gl_msaa_sharing);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_image2d_from_buffer);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_initialize_memory);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_spir);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_fp64);
+    APPEND_STR_IF_SUPPORTS(s,
+                           OclExtensions::has_cl_khr_global_int32_base_atomics);
+    APPEND_STR_IF_SUPPORTS(
+        s, OclExtensions::has_cl_khr_global_int32_extended_atomics);
+    APPEND_STR_IF_SUPPORTS(s,
+                           OclExtensions::has_cl_khr_local_int32_base_atomics);
+    APPEND_STR_IF_SUPPORTS(
+        s, OclExtensions::has_cl_khr_local_int32_extended_atomics);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cl_khr_byte_addressable_store);
+    APPEND_STR_IF_SUPPORTS(s, OclExtensions::has_cles_khr_int64);
+    APPEND_STR_IF_SUPPORTS(s,
+                           OclExtensions::has_cles_khr_2d_image_array_writes);
 
     return s;
 }
diff --git a/test_conformance/spir/run_services.h b/test_conformance/spir/run_services.h
index 6bac4c91..10f0d05e 100644
--- a/test_conformance/spir/run_services.h
+++ b/test_conformance/spir/run_services.h
@@ -113,42 +113,33 @@ private:
 
     OclExtensions(size_t ext) : m_extVector(ext) {}
 
-// Fix a compilation error, since cl_khr_gl_sharing is defined as a macro.
-#ifdef cl_khr_gl_sharing
-#undef cl_khr_gl_sharing
-#endif//cl_khr_gl_sharing
-
-#ifdef cl_khr_icd
-#undef cl_khr_icd
-#endif//cl_khr_icd
-
     enum ClKhrs
     {
-        no_extensions                 = KhrValue<0>::Mask,
-        cl_khr_int64_base_atomics     = KhrValue<1>::Mask,
-        cl_khr_int64_extended_atomics = KhrValue<2>::Mask,
-        cl_khr_3d_image_writes        = KhrValue<3>::Mask,
-        cl_khr_fp16                   = KhrValue<4>::Mask,
-        cl_khr_gl_sharing             = KhrValue<5>::Mask,
-        cl_khr_gl_event               = KhrValue<6>::Mask,
-        cl_khr_d3d10_sharing          = KhrValue<7>::Mask,
-        cl_khr_dx9_media_sharing      = KhrValue<8>::Mask,
-        cl_khr_d3d11_sharing          = KhrValue<9>::Mask,
-        cl_khr_depth_images           = KhrValue<10>::Mask,
-        cl_khr_gl_depth_images        = KhrValue<11>::Mask,
-        cl_khr_gl_msaa_sharing        = KhrValue<12>::Mask,
-        cl_khr_image2d_from_buffer    = KhrValue<13>::Mask,
-        cl_khr_initialize_memory      = KhrValue<14>::Mask,
-        cl_khr_context_abort          = KhrValue<15>::Mask,
-        cl_khr_spir                   = KhrValue<16>::Mask,
-        cl_khr_fp64                   = KhrValue<17>::Mask,
-        cl_khr_global_int32_base_atomics     = KhrValue<18>::Mask,
-        cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask,
-        cl_khr_local_int32_base_atomics      = KhrValue<20>::Mask,
-        cl_khr_local_int32_extended_atomics  = KhrValue<21>::Mask,
-        cl_khr_byte_addressable_store        = KhrValue<22>::Mask,
-        cles_khr_int64                         = KhrValue<23>::Mask,
-        cles_khr_2d_image_array_writes         = KhrValue<24>::Mask,
+        no_extensions = KhrValue<0>::Mask,
+        has_cl_khr_int64_base_atomics = KhrValue<1>::Mask,
+        has_cl_khr_int64_extended_atomics = KhrValue<2>::Mask,
+        has_cl_khr_3d_image_writes = KhrValue<3>::Mask,
+        has_cl_khr_fp16 = KhrValue<4>::Mask,
+        has_cl_khr_gl_sharing = KhrValue<5>::Mask,
+        has_cl_khr_gl_event = KhrValue<6>::Mask,
+        has_cl_khr_d3d10_sharing = KhrValue<7>::Mask,
+        has_cl_khr_dx9_media_sharing = KhrValue<8>::Mask,
+        has_cl_khr_d3d11_sharing = KhrValue<9>::Mask,
+        has_cl_khr_depth_images = KhrValue<10>::Mask,
+        has_cl_khr_gl_depth_images = KhrValue<11>::Mask,
+        has_cl_khr_gl_msaa_sharing = KhrValue<12>::Mask,
+        has_cl_khr_image2d_from_buffer = KhrValue<13>::Mask,
+        has_cl_khr_initialize_memory = KhrValue<14>::Mask,
+        has_cl_khr_context_abort = KhrValue<15>::Mask,
+        has_cl_khr_spir = KhrValue<16>::Mask,
+        has_cl_khr_fp64 = KhrValue<17>::Mask,
+        has_cl_khr_global_int32_base_atomics = KhrValue<18>::Mask,
+        has_cl_khr_global_int32_extended_atomics = KhrValue<19>::Mask,
+        has_cl_khr_local_int32_base_atomics = KhrValue<20>::Mask,
+        has_cl_khr_local_int32_extended_atomics = KhrValue<21>::Mask,
+        has_cl_khr_byte_addressable_store = KhrValue<22>::Mask,
+        has_cles_khr_int64 = KhrValue<23>::Mask,
+        has_cles_khr_2d_image_array_writes = KhrValue<24>::Mask,
     };
 
     size_t m_extVector;
-- 
cgit v1.2.3


From 0f4dc3166c9604b781d92a9acfd1fd13c4915846 Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Thu, 17 Mar 2022 14:27:24 -0600
Subject: conversions: Use volatile qualifier to prevent optimizations (#1399)

Use volatile to prevent clang optimizations, fix int2float
---
 test_conformance/conversions/basic_test_conversions.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test_conformance/conversions/basic_test_conversions.cpp b/test_conformance/conversions/basic_test_conversions.cpp
index 32998841..3ee072da 100644
--- a/test_conformance/conversions/basic_test_conversions.cpp
+++ b/test_conformance/conversions/basic_test_conversions.cpp
@@ -696,7 +696,8 @@ static void int2short( void *out, void *in){ ((cl_short*) out)[0] = ((cl_int*) i
 static void int2uint( void *out, void *in){ ((cl_uint*) out)[0] = ((cl_int*) in)[0]; }
 static void int2float( void *out, void *in)
 {
-    cl_int l = ((cl_int*) in)[0];
+    // Use volatile to prevent optimization by Clang compiler
+    volatile cl_int l = ((cl_int *)in)[0];
     ((float*) out)[0] = (l == 0 ? 0.0f : (float) l);        // Per IEEE-754-2008 5.4.1, 0's always convert to +0.0
 }
 static void int2double( void *out, void *in)
-- 
cgit v1.2.3


From f6dbc5b9b5321ae9d9dc177353e233d6d7964ec9 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 22 Mar 2022 16:21:09 +0000
Subject: Add cluster size handling in subgroup test helpers (#1394)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/subhelpers.h | 48 ++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index b88d2278..c73027dc 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -55,11 +55,12 @@ static cl_uint4 bs128_to_cl_uint4(bs128 v)
 
 struct WorkGroupParams
 {
-    WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1)
+    WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1)
         : global_workgroup_size(gws), local_workgroup_size(lws),
-          divergence_mask_arg(dm_arg)
+          divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg)
     {
         subgroup_size = 0;
+        cluster_size = 0;
         work_items_mask = 0;
         use_core_subgroups = true;
         dynsc = 0;
@@ -68,11 +69,13 @@ struct WorkGroupParams
     size_t global_workgroup_size;
     size_t local_workgroup_size;
     size_t subgroup_size;
+    cl_uint cluster_size;
     bs128 work_items_mask;
     int dynsc;
     bool use_core_subgroups;
     std::vector<bs128> all_work_item_masks;
     int divergence_mask_arg;
+    int cluster_size_arg;
     void save_kernel_source(const std::string &source, std::string name = "")
     {
         if (name == "")
@@ -1421,7 +1424,9 @@ public:
         return error;
     }
 
-    test_status run_and_check(const WorkGroupParams &test_params)
+private:
+    test_status
+    run_and_check_with_cluster_size(const WorkGroupParams &test_params)
     {
         cl_int error = run();
         if (error != CL_SUCCESS)
@@ -1444,6 +1449,35 @@ public:
 
         return status;
     }
+
+public:
+    test_status run_and_check(WorkGroupParams &test_params)
+    {
+        test_status tmp_status = TEST_SKIPPED_ITSELF;
+
+        if (test_params.cluster_size_arg != -1)
+        {
+            for (cl_uint cluster_size = 1;
+                 cluster_size <= test_params.subgroup_size; cluster_size *= 2)
+            {
+                test_params.cluster_size = cluster_size;
+                cl_int error =
+                    clSetKernelArg(kernel, test_params.cluster_size_arg,
+                                   sizeof(cl_uint), &cluster_size);
+                test_error_fail(error, "Unable to set cluster size");
+
+                tmp_status = run_and_check_with_cluster_size(test_params);
+
+                if (tmp_status == TEST_FAIL) break;
+            }
+        }
+        else
+        {
+            tmp_status = run_and_check_with_cluster_size(test_params);
+        }
+
+        return tmp_status;
+    }
 };
 
 // Driver for testing a single built in function
@@ -1592,6 +1626,14 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             test_error_fail(error, "Unable to set divergence mask argument");
         }
 
+        if (test_params.cluster_size_arg != -1)
+        {
+            cl_uint dummy_cluster_size = 1;
+            error = clSetKernelArg(kernel, test_params.cluster_size_arg,
+                                   sizeof(cl_uint), &dummy_cluster_size);
+            test_error_fail(error, "Unable to set dummy cluster size");
+        }
+
         KernelExecutor<Ty, Fns> executor(
             context, queue, kernel, global, local, idata.data(),
             input_array_size * sizeof(Ty), mapin.data(), mapout.data(),
-- 
cgit v1.2.3


From c42cf518dabf79b19d795ca9ce1e41c848cf54e8 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 29 Mar 2022 19:39:06 +0100
Subject: Improve cl_khr_subgroup_shuffle* test coverage (#1402)

Test cases where the index/mask/delta is greater than or equal to the
maximum subgroup size.  These are cases that return undefined results
but are not undefined behavior.

The index/mask/delta values now include values less than twice the
subgroup size, and 0xffffffff.

Testing for sub_group_shuffle_xor() already allowed inputs that were
greater or equal to the subgroup size for the last subgroup in a
workgroup, but did not properly account for this in the verification
function, potentially resulting in out of bounds accesses.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/subgroup_common_templates.h          | 78 +++++++++-------------
 1 file changed, 33 insertions(+), 45 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 641c1875..0ffa46c8 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -481,12 +481,12 @@ template <typename Ty, ShuffleOp operation> struct SHF
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        int i, ii, j, k, l, n, delta;
+        int i, ii, j, k, n, delta;
+        cl_uint l;
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
-        int d = ns > 100 ? 100 : ns;
         ii = 0;
         ng = ng / nw;
         for (k = 0; k < ng; ++k)
@@ -498,33 +498,10 @@ template <typename Ty, ShuffleOp operation> struct SHF
                 for (i = 0; i < n; ++i)
                 {
                     int midx = 4 * ii + 4 * i + 2;
-                    l = (int)(genrand_int32(gMTdata) & 0x7fffffff)
-                        % (d > n ? n : d);
-                    switch (operation)
-                    {
-                        case ShuffleOp::shuffle:
-                        case ShuffleOp::shuffle_xor:
-                            // storing information about shuffle index
-                            m[midx] = (cl_int)l;
-                            break;
-                        case ShuffleOp::shuffle_up:
-                            delta = l; // calculate delta for shuffle up
-                            if (i - delta < 0)
-                            {
-                                delta = i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        case ShuffleOp::shuffle_down:
-                            delta = l; // calculate delta for shuffle down
-                            if (i + delta >= n)
-                            {
-                                delta = n - 1 - i;
-                            }
-                            m[midx] = (cl_int)delta;
-                            break;
-                        default: break;
-                    }
+                    l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1)
+                         % (ns * 2 + 1))
+                        - 1;
+                    m[midx] = l;
                     cl_ulong number = genrand_int64(gMTdata);
                     set_value(t[ii + i], number);
                 }
@@ -542,7 +519,8 @@ template <typename Ty, ShuffleOp operation> struct SHF
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
                            const WorkGroupParams &test_params)
     {
-        int ii, i, j, k, l, n;
+        int ii, i, j, k, n;
+        cl_uint l;
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
         int ng = test_params.global_workgroup_size;
@@ -567,32 +545,42 @@ template <typename Ty, ShuffleOp operation> struct SHF
                 { // inside the subgroup
                   // shuffle index storage
                     int midx = 4 * ii + 4 * i + 2;
-                    l = (int)m[midx];
+                    l = m[midx];
                     rr = my[ii + i];
+                    cl_uint tr_idx;
+                    bool skip = false;
                     switch (operation)
                     {
                         // shuffle basic - treat l as index
-                        case ShuffleOp::shuffle: tr = mx[ii + l]; break;
-                        // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_up: tr = mx[ii + i - l]; break;
+                        case ShuffleOp::shuffle: tr_idx = l; break;
+                        // shuffle xor - treat l as mask
+                        case ShuffleOp::shuffle_xor: tr_idx = i ^ l; break;
                         // shuffle up - treat l as delta
-                        case ShuffleOp::shuffle_down:
-                            tr = mx[ii + i + l];
+                        case ShuffleOp::shuffle_up:
+                            if (l >= ns) skip = true;
+                            tr_idx = i - l;
                             break;
-                        // shuffle xor - treat l as mask
-                        case ShuffleOp::shuffle_xor:
-                            tr = mx[ii + (i ^ l)];
+                        // shuffle down - treat l as delta
+                        case ShuffleOp::shuffle_down:
+                            if (l >= ns) skip = true;
+                            tr_idx = i + l;
                             break;
                         default: break;
                     }
 
-                    if (!compare(rr, tr))
+                    if (!skip && tr_idx < n)
                     {
-                        log_error("ERROR: sub_group_%s(%s) mismatch for "
-                                  "local id %d in sub group %d in group %d\n",
-                                  operation_names(operation),
-                                  TypeManager<Ty>::name(), i, j, k);
-                        return TEST_FAIL;
+                        tr = mx[ii + tr_idx];
+
+                        if (!compare(rr, tr))
+                        {
+                            log_error("ERROR: sub_group_%s(%s) mismatch for "
+                                      "local id %d in sub group %d in group "
+                                      "%d\n",
+                                      operation_names(operation),
+                                      TypeManager<Ty>::name(), i, j, k);
+                            return TEST_FAIL;
+                        }
                     }
                 }
             }
-- 
cgit v1.2.3


From 93f4f6a54842b0080ec7bb562e0324f735487a36 Mon Sep 17 00:00:00 2001
From: Jason Tang <jason.tang@amd.com>
Date: Tue, 29 Mar 2022 19:08:08 -0400
Subject: test_api_min_max.cpp: use size_t for get_global_id() value (#1410)

In some rare cases where get_global_id() is larger than 2G, the 32bit int type would convert the value into a negative integer.
---
 test_conformance/api/test_api_min_max.cpp | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 28ca8237..9e08b16d 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -22,19 +22,11 @@
 const char *sample_single_param_kernel[] = {
     "__kernel void sample_test(__global int *src)\n"
     "{\n"
-    "    int  tid = get_global_id(0);\n"
+    "    size_t  tid = get_global_id(0);\n"
     "\n"
     "}\n"
 };
 
-const char *sample_single_param_write_kernel[] = {
-    "__kernel void sample_test(__global int *src)\n"
-    "{\n"
-    "    int  tid = get_global_id(0);\n"
-    "     src[tid] = tid;\n"
-    "\n"
-    "}\n"
-};
 
 const char *sample_read_image_kernel_pattern[] = {
     "__kernel void sample_test( __global float *result, ",
@@ -42,7 +34,7 @@ const char *sample_read_image_kernel_pattern[] = {
     "{\n"
     "  sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | "
     "CLK_FILTER_NEAREST;\n"
-    "    int  tid = get_global_id(0);\n"
+    "    size_t  tid = get_global_id(0);\n"
     "    result[0] = 0.0f;\n",
     "\n"
     "}\n"
@@ -52,7 +44,7 @@ const char *sample_write_image_kernel_pattern[] = {
     "__kernel void sample_test( ",
     " )\n"
     "{\n"
-    "    int  tid = get_global_id(0);\n",
+    "    size_t  tid = get_global_id(0);\n",
     "\n"
     "}\n"
 };
@@ -81,8 +73,8 @@ const char *sample_sampler_kernel_pattern[] = {
     ", sampler_t sampler%d",
     ")\n"
     "{\n"
-    "    int  tid = get_global_id(0);\n",
-    "     dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
+    "    size_t  tid = get_global_id(0);\n",
+    "    dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
     "\n"
     "}\n"
 };
@@ -90,7 +82,7 @@ const char *sample_sampler_kernel_pattern[] = {
 const char *sample_const_arg_kernel[] = {
     "__kernel void sample_test(__constant int *src1, __global int *dst)\n"
     "{\n"
-    "    int  tid = get_global_id(0);\n"
+    "    size_t  tid = get_global_id(0);\n"
     "\n"
     "    dst[tid] = src1[tid];\n"
     "\n"
@@ -101,7 +93,7 @@ const char *sample_local_arg_kernel[] = {
     "__kernel void sample_test(__local int *src1, __global int *global_src, "
     "__global int *dst)\n"
     "{\n"
-    "    int  tid = get_global_id(0);\n"
+    "    size_t  tid = get_global_id(0);\n"
     "\n"
     "    src1[tid] = global_src[tid];\n"
     "    barrier(CLK_GLOBAL_MEM_FENCE);\n"
-- 
cgit v1.2.3


From e121b9d1bf0380a9d0468686e79ac1b4057857b8 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Mon, 4 Apr 2022 17:57:36 +0100
Subject: Fix sub_group_ballot_find_msb/lsb tests (#1411)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As per the OpenCL Extension Specification § 38.6 Ballots:

   If no bits representing predicate values from all work items in
   the subgroup are set in the bitfield value then the return value
   is undefined.

The case with no bits set is still worth testing, as it does not result
in undefined behavior, but only an undefined return value.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/test_subgroup_ballot.cpp | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index 4148707e..b1e6944f 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -609,6 +609,12 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                     }
                     else if (operation == BallotOp::ballot_find_lsb)
                     {
+                        if (bs.none())
+                        {
+                            // Return value is undefined when no bits are set,
+                            // so skip validation:
+                            continue;
+                        }
                         for (int id = 0; id < sbs; ++id)
                         {
                             if (bs.test(id))
@@ -630,6 +636,12 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                     }
                     else if (operation == BallotOp::ballot_find_msb)
                     {
+                        if (bs.none())
+                        {
+                            // Return value is undefined when no bits are set,
+                            // so skip validation:
+                            continue;
+                        }
                         for (int id = sbs - 1; id >= 0; --id)
                         {
                             if (bs.test(id))
-- 
cgit v1.2.3


From 2fcdde96d246cd405ec4fc97fb90eb235ba9fd1e Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Mon, 4 Apr 2022 13:19:30 -0700
Subject: refactor work group scan and reduction tests (#1401)

* updated reduce test

* switched all reduce tests to new framework

* switch over scans to new framework

* remove old files

* minor fixes

* add type type name to the kernel name

* fix Windows build and warnings

* address review comments
---
 test_conformance/workgroups/CMakeLists.txt         |  10 +-
 test_conformance/workgroups/test_wg_reduce.cpp     | 596 -------------------
 test_conformance/workgroups/test_wg_reduce_max.cpp | 632 --------------------
 test_conformance/workgroups/test_wg_reduce_min.cpp | 632 --------------------
 .../workgroups/test_wg_scan_exclusive_add.cpp      | 604 --------------------
 .../workgroups/test_wg_scan_exclusive_max.cpp      | 632 --------------------
 .../workgroups/test_wg_scan_exclusive_min.cpp      | 633 ---------------------
 .../workgroups/test_wg_scan_inclusive_add.cpp      | 593 -------------------
 .../workgroups/test_wg_scan_inclusive_max.cpp      | 597 -------------------
 .../workgroups/test_wg_scan_inclusive_min.cpp      | 597 -------------------
 .../workgroups/test_wg_scan_reduce.cpp             | 456 +++++++++++++++
 11 files changed, 457 insertions(+), 5525 deletions(-)
 delete mode 100644 test_conformance/workgroups/test_wg_reduce.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_reduce_max.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_reduce_min.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
 delete mode 100644 test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
 create mode 100644 test_conformance/workgroups/test_wg_scan_reduce.cpp

diff --git a/test_conformance/workgroups/CMakeLists.txt b/test_conformance/workgroups/CMakeLists.txt
index c90bef88..0c004b32 100644
--- a/test_conformance/workgroups/CMakeLists.txt
+++ b/test_conformance/workgroups/CMakeLists.txt
@@ -5,15 +5,7 @@ set(${MODULE_NAME}_SOURCES
     test_wg_all.cpp
     test_wg_any.cpp
     test_wg_broadcast.cpp
-    test_wg_reduce.cpp
-    test_wg_reduce_max.cpp
-    test_wg_reduce_min.cpp
-    test_wg_scan_exclusive_add.cpp
-    test_wg_scan_exclusive_min.cpp
-    test_wg_scan_exclusive_max.cpp
-    test_wg_scan_inclusive_add.cpp
-    test_wg_scan_inclusive_min.cpp
-    test_wg_scan_inclusive_max.cpp
+    test_wg_scan_reduce.cpp
     test_wg_suggested_local_work_size.cpp
 )
 
diff --git a/test_conformance/workgroups/test_wg_reduce.cpp b/test_conformance/workgroups/test_wg_reduce.cpp
deleted file mode 100644
index eb26f498..00000000
--- a/test_conformance/workgroups/test_wg_reduce.cpp
+++ /dev/null
@@ -1,596 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_add_kernel_code_int =
-"__kernel void test_wg_reduce_add_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_reduce_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_add_kernel_code_uint =
-"__kernel void test_wg_reduce_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_reduce_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_add_kernel_code_long =
-"__kernel void test_wg_reduce_add_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_reduce_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_add_kernel_code_ulong =
-"__kernel void test_wg_reduce_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_reduce_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        int sum = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            sum += inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( sum != outptr[i+j] )
-            {
-                log_info("work_group_reduce_add int: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        unsigned int sum = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            sum += inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( sum != outptr[i+j] )
-            {
-                log_info("work_group_reduce_add uint: Error at %u: expected = %d, got = %d\n", i+j, sum, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_long sum = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            sum += inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( sum != outptr[i+j] )
-            {
-                log_info("work_group_reduce_add long: Error at %u: expected = %lld, got = %lld\n", i+j, sum, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_ulong sum = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            sum += inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( sum != outptr[i+j] )
-            {
-                log_info("work_group_reduce_add ulong: Error at %u: expected = %llu, got = %llu\n", i+j, sum, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-
-int
-test_work_group_reduce_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_int,
-                                      "test_wg_reduce_add_int");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_add int failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_add int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_uint,
-                                      "test_wg_reduce_add_uint");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_add uint failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_add uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_reduce_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_long,
-                                      "test_wg_reduce_add_long");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_add long failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_add long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_add_kernel_code_ulong,
-                                      "test_wg_reduce_add_ulong");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_add ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_add ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_reduce_add_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_add_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_add_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_add_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_reduce_max.cpp b/test_conformance/workgroups/test_wg_reduce_max.cpp
deleted file mode 100644
index 3bbd3f25..00000000
--- a/test_conformance/workgroups/test_wg_reduce_max.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_max_kernel_code_int =
-"__kernel void test_wg_reduce_max_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_reduce_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_max_kernel_code_uint =
-"__kernel void test_wg_reduce_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_reduce_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_max_kernel_code_long =
-"__kernel void test_wg_reduce_max_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_reduce_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_max_kernel_code_ulong =
-"__kernel void test_wg_reduce_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_reduce_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_max_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        int max = CL_INT_MIN;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            max = (max > inptr[i+j]) ? max : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( max != outptr[i+j] )
-            {
-                log_info("work_group_reduce_max int: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        unsigned int max = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            max = (max > inptr[i+j]) ? max : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( max != outptr[i+j] )
-            {
-                log_info("work_group_reduce_max uint: Error at %u: expected = %d, got = %d\n", i+j, max, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_long max = CL_LONG_MIN;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            max = (max > inptr[i+j]) ? max : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( max != outptr[i+j] )
-            {
-                log_info("work_group_reduce_max long: Error at %u: expected = %lld, got = %lld\n", i+j, max, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_ulong max = 0;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            max = (max > inptr[i+j]) ? max : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( max != outptr[i+j] )
-            {
-                log_info("work_group_reduce_max ulong: Error at %u: expected = %llu, got = %llu\n", i+j, max, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-
-int
-test_work_group_reduce_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_int,
-                                      "test_wg_reduce_max_int");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_max int failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_max int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_uint,
-                                      "test_wg_reduce_max_uint");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_max uint failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_max uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_reduce_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_long,
-                                      "test_wg_reduce_max_long");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_max long failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_max long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_max_kernel_code_ulong,
-                                      "test_wg_reduce_max_ulong");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_max ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_max ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_reduce_max_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_max_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_max_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_max_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_reduce_min.cpp b/test_conformance/workgroups/test_wg_reduce_min.cpp
deleted file mode 100644
index 7b1b22e8..00000000
--- a/test_conformance/workgroups/test_wg_reduce_min.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_reduce_min_kernel_code_int =
-"__kernel void test_wg_reduce_min_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_reduce_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_min_kernel_code_uint =
-"__kernel void test_wg_reduce_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_reduce_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_reduce_min_kernel_code_long =
-"__kernel void test_wg_reduce_min_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_reduce_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_reduce_min_kernel_code_ulong =
-"__kernel void test_wg_reduce_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_reduce_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_reduce_min_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        int min = CL_INT_MAX;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            min = (min < inptr[i+j]) ? min : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( min != outptr[i+j] )
-            {
-                log_info("work_group_reduce_min int: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        unsigned int min = CL_UINT_MAX;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            min = (min < inptr[i+j]) ? min : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( min != outptr[i+j] )
-            {
-                log_info("work_group_reduce_min uint: Error at %u: expected = %d, got = %d\n", i+j, min, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_long min = CL_ULONG_MAX;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            min = (min < inptr[i+j]) ? min : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( min != outptr[i+j] )
-            {
-                log_info("work_group_reduce_min long: Error at %u: expected = %lld, got = %lld\n", i+j, min, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_reduce_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
-    size_t     i, j;
-
-    for (i=0; i<n; i+=wg_size)
-    {
-        cl_ulong min = CL_ULONG_MAX;
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-            min = (min < inptr[i+j]) ? min : inptr[i+j];
-
-        for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
-        {
-            if ( min != outptr[i+j] )
-            {
-                log_info("work_group_reduce_min ulong: Error at %u: expected = %llu, got = %llu\n", i+j, min, outptr[i+j]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-
-int
-test_work_group_reduce_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_int,
-                                      "test_wg_reduce_min_int");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_min int failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_min int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_uint,
-                                      "test_wg_reduce_min_uint");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_min uint failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_min uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_reduce_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_long,
-                                      "test_wg_reduce_min_long");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_min long failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_min long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_reduce_min_kernel_code_ulong,
-                                      "test_wg_reduce_min_ulong");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_reduce_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_reduce_min ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_reduce_min ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_reduce_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_reduce_min_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_min_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_min_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_reduce_min_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
deleted file mode 100644
index e695a165..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_add.cpp
+++ /dev/null
@@ -1,604 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_exclusive_add_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_add_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_exclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_add_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_exclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_add_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_add_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_exclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_add_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_exclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_exclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-    int s, lasts;
-
-
-
-    for (j = 0; j < n; j += wg_size) {
-        m = n - j;
-        if (m > wg_size) m = wg_size;
-
-        s = 0;
-        lasts = 0;
-        for (i = 0; i < m; ++i) {
-            s += inptr[j + i];
-            if (outptr[j + i] != lasts) {
-                log_info("work_group_scan_exclusive_add int: Error at %u: expected = %d, got = %d\n",
-                         (unsigned int)(j + i), lasts, outptr[j + i]);
-                return -1;
-            }
-            lasts = s;
-        }
-    }
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-    unsigned int s, lasts;
-
-    for (j = 0; j < n; j += wg_size) {
-        m = n - j;
-        if (m > wg_size) m = wg_size;
-        s = 0;
-        lasts = 0;
-        for (i = 0; i < m; ++i) {
-            s += inptr[j + i];
-            if (outptr[j + i] != lasts) {
-                log_info("work_group_scan_exclusive_add uint: Error at %u: expected = %u, got = %u\n",
-                        (unsigned int)(j + i), lasts, outptr[j + i]);
-                return -1;
-            }
-            lasts = s;
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-    cl_long s, lasts;
-
-    for (j = 0; j < n; j += wg_size) {
-        m = n - j;
-        if (m > wg_size) m = wg_size;
-        s = 0;
-
-        lasts = 0;
-        for (i = 0; i < m; ++i) {
-            s += inptr[j + i];
-
-            if (outptr[j + i] != lasts) {
-                log_info("work_group_scan_exclusive_add long: Error at %u: expected = %lld, got = %lld\n",
-                         (unsigned int)(j + i), (long long)lasts, (long long)outptr[j + i]);
-                return -1;
-            }
-            lasts = s;
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    cl_ulong s, lasts;
-
-    for (j = 0; j < n; j += wg_size) {
-        m = n - j;
-        if (m > wg_size) m = wg_size;
-
-        s = 0;
-        lasts = 0;
-        for (i = 0; i < m; ++i) {
-            s += inptr[j + i];
-            if (outptr[j + i] != lasts) {
-                log_info("work_group_scan_exclusive_add ulong: Error at %u: expected = %llu, got = %llu\n",
-                         (unsigned int)(j + i), (unsigned long long)lasts, (unsigned long long)outptr[j + i]);
-                return -1;
-            }
-            lasts = s;
-        }
-    }
-    return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_int,
-                                      "test_wg_scan_exclusive_add_int");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_add int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_add int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_uint,
-                                      "test_wg_scan_exclusive_add_uint");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_add uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_add uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_exclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_long,
-                                      "test_wg_scan_exclusive_add_long");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_add long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_add long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_add_ulong");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_add ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_exclusive_add_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_add_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_add_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_add_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
deleted file mode 100644
index 644b3ccf..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_max.cpp
+++ /dev/null
@@ -1,632 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <algorithm>
-
-#include "procs.h"
-
-const char *wg_scan_exclusive_max_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_max_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_exclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_max_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_exclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_max_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_max_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_exclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_max_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_exclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_exclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        int max_ = 0x80000000;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_exclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-            max_ = std::max(inptr[j + i], max_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        unsigned int max_ = 0x0;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_exclusive_max int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-            max_ = std::max(inptr[j + i], max_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_long max_ = 0x8000000000000000ULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_exclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-            max_ = std::max(inptr[j + i], max_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_ulong max_ = 0x0;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_exclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-            max_ = std::max(inptr[j + i], max_);
-        }
-    }
-
-    return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_int,
-                                      "test_wg_scan_exclusive_max_int");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_max int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_max int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_uint,
-                                      "test_wg_scan_exclusive_max_uint");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_max uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_max uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_exclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_long,
-                                      "test_wg_scan_exclusive_max_long");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_max long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_max long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_max_ulong");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_max ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_exclusive_max_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_max_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_max_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_max_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
deleted file mode 100644
index 3c6dfc87..00000000
--- a/test_conformance/workgroups/test_wg_scan_exclusive_min.cpp
+++ /dev/null
@@ -1,633 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <algorithm>
-
-#include "procs.h"
-
-const char *wg_scan_exclusive_min_kernel_code_int =
-"__kernel void test_wg_scan_exclusive_min_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_exclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_min_kernel_code_uint =
-"__kernel void test_wg_scan_exclusive_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_exclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_exclusive_min_kernel_code_long =
-"__kernel void test_wg_scan_exclusive_min_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_exclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_exclusive_min_kernel_code_ulong =
-"__kernel void test_wg_scan_exclusive_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_exclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-
-static int
-verify_wg_scan_exclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        int min_ = 0x7fffffff;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_exclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-            min_ = std::min(inptr[j + i], min_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        unsigned int min_ = 0xffffffff;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_exclusive_min int: Error at %u: expected = %u, got = %u\n", j+i, min_, outptr[j+i]);
-                return -1;
-            }
-            min_ = std::min(inptr[j + i], min_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_long min_ = 0x7fffffffffffffffULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_exclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-            min_ = std::min(inptr[j + i], min_);
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_exclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-     for (j=0; j<n; j+=wg_size) {
-        cl_ulong min_ = 0xffffffffffffffffULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_exclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-            min_ = std::min(inptr[j + i], min_);
-        }
-    }
-
-    return 0;
-}
-
-
-int
-test_work_group_scan_exclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_int,
-                                      "test_wg_scan_exclusive_min_int");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_min int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_min int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_uint,
-                                      "test_wg_scan_exclusive_min_uint");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_min uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_min uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_exclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_long,
-                                      "test_wg_scan_exclusive_min_long");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusive_min long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_min long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       wg_sizes_per_dimension[3];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_exclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_exclusive_min_ulong");
-    if (err)
-        return -1;
-
-    err = clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), wg_size, NULL);
-    if (err)
-        return -1;
-
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * 3, wg_sizes_per_dimension, NULL);
-    if (err)
-        return -1;
-    if(wg_sizes_per_dimension[0] < wg_size[0])
-    {
-        wg_size[0] = wg_sizes_per_dimension[0];
-    }
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_exclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_exclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_exclusive_min ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_exclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_exclusive_min_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_min_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_min_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_exclusive_min_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
deleted file mode 100644
index 51c98a4e..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_add.cpp
+++ /dev/null
@@ -1,593 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_add_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_add_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_inclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_add_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_add_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_inclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_add_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_add_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_inclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_add_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_add_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_inclusive_add(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_add_int(int *inptr, int *outptr, size_t n, size_t wg_size)
-{
-    size_t i, j, m;
-    int s;
-
-    for (j=0; j<n; j+=wg_size) {
-    m = n - j;
-    if (m > wg_size)
-        m = wg_size;
-
-    s = 0;
-    for (i=0; i<m; ++i) {
-        s += inptr[j+i];
-        if (outptr[j+i] != s) {
-                log_info("work_group_scan_inclusive_add int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), s, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size)
-{
-    size_t i, j, m;
-    unsigned int s;
-
-    for (j=0; j<n; j+=wg_size) {
-    m = n - j;
-    if (m > wg_size)
-        m = wg_size;
-
-    s = 0;
-    for (i=0; i<m; ++i) {
-        s += inptr[j+i];
-        if (outptr[j+i] != s) {
-                log_info("work_group_scan_inclusive_add uint: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), s, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size)
-{
-    size_t i, j, m;
-    cl_long s;
-
-    for (j=0; j<n; j+=wg_size) {
-    m = n - j;
-    if (m > wg_size)
-        m = wg_size;
-
-    s = 0;
-    for (i=0; i<m; ++i) {
-        s += inptr[j+i];
-        if (outptr[j+i] != s) {
-                log_info("work_group_scan_inclusive_add long: Error at %u: expected = %lld, got = %lld\n",
-            (unsigned int)(j+i), (long long)s, (long long)outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_add_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size)
-{
-    size_t i, j, m;
-    cl_ulong s;
-
-    for (j=0; j<n; j+=wg_size) {
-    m = n - j;
-    if (m > wg_size)
-        m = wg_size;
-
-    s = 0;
-    for (i=0; i<m; ++i) {
-        s += inptr[j+i];
-        if (outptr[j+i] != s) {
-                log_info("work_group_scan_inclusive_add int: Error at %u: expected = %llu, got = %llu\n",
-            (unsigned int)(j+i), (unsigned long long)s, (unsigned long long)outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-    return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_add_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_int,
-                                      "test_wg_scan_inclusive_add_int");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_add_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_add int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_add int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_uint,
-                                      "test_wg_scan_inclusive_add_uint");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_add_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_add uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_add uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_inclusive_add_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_long,
-                                      "test_wg_scan_inclusive_add_long");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_add_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_add long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_add long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_add_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_add_ulong");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_add_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_add ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_add(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_inclusive_add_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_add_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_add_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_add_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
deleted file mode 100644
index 2a2e230e..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_max.cpp
+++ /dev/null
@@ -1,597 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <algorithm>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_max_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_max_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_inclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_max_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_max_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_inclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_max_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_max_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_inclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_max_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_max_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_inclusive_max(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_max_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        int max_ = 0x80000000;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            max_ = std::max(inptr[j + i], max_);
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_inclusive_max int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        unsigned int max_ = 0x0;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            max_ = std::max(inptr[j + i], max_);
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_inclusive_max int: Error at %lu: expected = %u, got = %u\n", (unsigned long)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_long max_ = 0x8000000000000000ULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            max_ = std::max(inptr[j + i], max_);
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_inclusive_max long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_max_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_ulong max_ = 0x0;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            max_ = std::max(inptr[j + i], max_);
-            if (outptr[j+i] != max_) {
-                log_info("work_group_scan_inclusive_max ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), max_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_max_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_int,
-                                      "test_wg_scan_inclusive_max_int");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_max_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_max int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_max int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_uint,
-                                      "test_wg_scan_inclusive_max_uint");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_max_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_max uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_max uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_inclusive_max_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_long,
-                                      "test_wg_scan_inclusive_max_long");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_max_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_max long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_max long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_max_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_max_ulong");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_max_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_max ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_max(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_inclusive_max_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_max_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_max_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_max_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp b/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
deleted file mode 100644
index adbdad56..00000000
--- a/test_conformance/workgroups/test_wg_scan_inclusive_min.cpp
+++ /dev/null
@@ -1,597 +0,0 @@
-//
-// Copyright (c) 2017 The Khronos Group Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-#include "harness/compat.h"
-
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <algorithm>
-
-#include "procs.h"
-
-
-const char *wg_scan_inclusive_min_kernel_code_int =
-"__kernel void test_wg_scan_inclusive_min_int(global int *input, global int *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    int result = work_group_scan_inclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_min_kernel_code_uint =
-"__kernel void test_wg_scan_inclusive_min_uint(global uint *input, global uint *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    uint result = work_group_scan_inclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-const char *wg_scan_inclusive_min_kernel_code_long =
-"__kernel void test_wg_scan_inclusive_min_long(global long *input, global long *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    long result = work_group_scan_inclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-const char *wg_scan_inclusive_min_kernel_code_ulong =
-"__kernel void test_wg_scan_inclusive_min_ulong(global ulong *input, global ulong *output)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"\n"
-"    ulong result = work_group_scan_inclusive_min(input[tid]);\n"
-"    output[tid] = result;\n"
-"}\n";
-
-
-static int
-verify_wg_scan_inclusive_min_int(int *inptr, int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        int min_ = 0x7fffffff;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            min_ = std::min(inptr[j + i], min_);
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_inclusive_min int: Error at %u: expected = %d, got = %d\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_uint(unsigned int *inptr, unsigned int *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        unsigned int min_ = 0xffffffff;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            min_ = std::min(inptr[j + i], min_);
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_inclusive_min int: Error at %u: expected = %u, got = %u\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_long(cl_long *inptr, cl_long *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-    for (j=0; j<n; j+=wg_size) {
-        cl_long min_ = 0x7fffffffffffffffULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            min_ = std::min(inptr[j + i], min_);
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_inclusive_min long: Error at %u: expected = %lld, got = %lld\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-static int
-verify_wg_scan_inclusive_min_ulong(cl_ulong *inptr, cl_ulong *outptr, size_t n, size_t wg_size) {
-
-    size_t i, j, m;
-
-     for (j=0; j<n; j+=wg_size) {
-        cl_ulong min_ = 0xffffffffffffffffULL;
-
-        m = n - j;
-        if (m > wg_size)
-            m = wg_size;
-
-        for (i = 0; i < m; ++i) {
-            min_ = std::min(inptr[j + i], min_);
-            if (outptr[j+i] != min_) {
-                log_info("work_group_scan_inclusive_min ulong: Error at %u: expected = %llu, got = %llu\n", (unsigned int)(j+i), min_, outptr[j+i]);
-                return -1;
-            }
-        }
-    }
-
-    return 0;
-}
-
-
-int
-test_work_group_scan_inclusive_min_int(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_int       *input_ptr[1], *p;
-    cl_int       *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_int,
-                                      "test_wg_scan_inclusive_min_int");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    output_ptr = (cl_int*)malloc(sizeof(cl_int) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_int) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_int) * num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)num_elements;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_int)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_min_int(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_min int failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_min int passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min_uint(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_uint      *input_ptr[1], *p;
-    cl_uint      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_uint,
-                                      "test_wg_scan_inclusive_min_uint");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    output_ptr = (cl_uint*)malloc(sizeof(cl_uint) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_uint) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int32(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_uint)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_uint)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_uint)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_min_uint(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_min uint failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_min uint passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-int
-test_work_group_scan_inclusive_min_long(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_long      *input_ptr[1], *p;
-    cl_long      *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_long,
-                                      "test_wg_scan_inclusive_min_long");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    output_ptr = (cl_long*)malloc(sizeof(cl_long) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_long) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_long)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_long)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_long)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_min_long(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusive_min long failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_min long passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min_ulong(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    cl_mem       streams[2];
-    cl_ulong     *input_ptr[1], *p;
-    cl_ulong     *output_ptr;
-    cl_program   program;
-    cl_kernel    kernel;
-    void         *values[2];
-    size_t       threads[1];
-    size_t       wg_size[1];
-    size_t       num_elements;
-    int          err;
-    int          i;
-    MTdata       d;
-
-    err = create_single_kernel_helper(context, &program, &kernel, 1,
-                                      &wg_scan_inclusive_min_kernel_code_ulong,
-                                      "test_wg_scan_inclusive_min_ulong");
-    if (err)
-        return -1;
-
-    // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
-    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
-    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
-
-    num_elements = n_elems;
-
-    input_ptr[0] = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    output_ptr = (cl_ulong*)malloc(sizeof(cl_ulong) * num_elements);
-    streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[0])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
-                                sizeof(cl_ulong) * num_elements, NULL, NULL);
-    if (!streams[1])
-    {
-        log_error("clCreateBuffer failed\n");
-        return -1;
-    }
-
-    p = input_ptr[0];
-    d = init_genrand( gRandomSeed );
-    for (i=0; i<num_elements; i++)
-        p[i] = genrand_int64(d);
-    free_mtdata(d); d = NULL;
-
-    err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_ulong)*num_elements, (void *)input_ptr[0], 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clWriteArray failed\n");
-        return -1;
-    }
-
-    values[0] = streams[0];
-    values[1] = streams[1];
-    err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
-    err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
-
-    // Line below is troublesome...
-    threads[0] = (size_t)n_elems;
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueNDRangeKernel failed\n");
-        return -1;
-    }
-
-    cl_uint dead = 0xdeaddead;
-    memset_pattern4(output_ptr, &dead, sizeof(cl_ulong)*num_elements);
-    err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_ulong)*num_elements, (void *)output_ptr, 0, NULL, NULL );
-    if (err != CL_SUCCESS)
-    {
-        log_error("clEnqueueReadBuffer failed\n");
-        return -1;
-    }
-
-    if (verify_wg_scan_inclusive_min_ulong(input_ptr[0], output_ptr, num_elements, wg_size[0]))
-    {
-        log_error("work_group_scan_inclusiveadd ulong failed\n");
-        return -1;
-    }
-    log_info("work_group_scan_inclusive_min ulong passed\n");
-
-    clReleaseMemObject(streams[0]);
-    clReleaseMemObject(streams[1]);
-    clReleaseKernel(kernel);
-    clReleaseProgram(program);
-    free(input_ptr[0]);
-    free(output_ptr);
-
-    return err;
-}
-
-
-int
-test_work_group_scan_inclusive_min(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
-{
-    int err;
-
-    err = test_work_group_scan_inclusive_min_int(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_min_uint(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_min_long(device, context, queue, n_elems);
-    if (err) return err;
-    err = test_work_group_scan_inclusive_min_ulong(device, context, queue, n_elems);
-    return err;
-}
-
diff --git a/test_conformance/workgroups/test_wg_scan_reduce.cpp b/test_conformance/workgroups/test_wg_scan_reduce.cpp
new file mode 100644
index 00000000..bf4dc89e
--- /dev/null
+++ b/test_conformance/workgroups/test_wg_scan_reduce.cpp
@@ -0,0 +1,456 @@
+//
+// Copyright (c) 2017-2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "harness/compat.h"
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "procs.h"
+
+static std::string make_kernel_string(const std::string &type,
+                                      const std::string &kernelName,
+                                      const std::string &func)
+{
+    // Build a kernel string of the form:
+    // __kernel void KERNEL_NAME(global TYPE *input, global TYPE *output) {
+    //     int  tid = get_global_id(0);
+    //     output[tid] = FUNC(input[tid]);
+    // }
+
+    std::ostringstream os;
+    os << "__kernel void " << kernelName << "(global " << type
+       << " *input, global " << type << " *output) {\n";
+    os << "    int tid = get_global_id(0);\n";
+    os << "    output[tid] = " << func << "(input[tid]);\n";
+    os << "}\n";
+    return os.str();
+}
+
+template <typename T> struct TestTypeInfo
+{
+};
+
+template <> struct TestTypeInfo<cl_int>
+{
+    static constexpr const char *deviceName = "int";
+};
+
+template <> struct TestTypeInfo<cl_uint>
+{
+    static constexpr const char *deviceName = "uint";
+};
+
+template <> struct TestTypeInfo<cl_long>
+{
+    static constexpr const char *deviceName = "long";
+};
+
+template <> struct TestTypeInfo<cl_ulong>
+{
+    static constexpr const char *deviceName = "ulong";
+};
+
+template <typename T> struct Add
+{
+    using Type = T;
+    static constexpr const char *opName = "add";
+    static constexpr T identityValue = 0;
+    static T combine(T a, T b) { return a + b; }
+};
+
+template <typename T> struct Max
+{
+    using Type = T;
+    static constexpr const char *opName = "max";
+    static constexpr T identityValue = std::numeric_limits<T>::min();
+    static T combine(T a, T b) { return std::max(a, b); }
+};
+
+template <typename T> struct Min
+{
+    using Type = T;
+    static constexpr const char *opName = "min";
+    static constexpr T identityValue = std::numeric_limits<T>::max();
+    static T combine(T a, T b) { return std::min(a, b); }
+};
+
+template <typename C> struct Reduce
+{
+    using Type = typename C::Type;
+
+    static constexpr const char *testName = "work_group_reduce";
+    static constexpr const char *testOpName = C::opName;
+    static constexpr const char *deviceTypeName =
+        TestTypeInfo<Type>::deviceName;
+    static constexpr const char *kernelName = "test_wg_reduce";
+    static int verify(Type *inptr, Type *outptr, size_t n_elems,
+                      size_t max_wg_size)
+    {
+        for (size_t i = 0; i < n_elems; i += max_wg_size)
+        {
+            size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+            Type result = C::identityValue;
+            for (size_t j = 0; j < wg_size; j++)
+            {
+                result = C::combine(result, inptr[i + j]);
+            }
+
+            for (size_t j = 0; j < wg_size; j++)
+            {
+                if (result != outptr[i + j])
+                {
+                    log_info("%s_%s: Error at %zu\n", testName, testOpName,
+                             i + j);
+                    return -1;
+                }
+            }
+        }
+        return 0;
+    }
+};
+
+template <typename C> struct ScanInclusive
+{
+    using Type = typename C::Type;
+
+    static constexpr const char *testName = "work_group_scan_inclusive";
+    static constexpr const char *testOpName = C::opName;
+    static constexpr const char *deviceTypeName =
+        TestTypeInfo<Type>::deviceName;
+    static constexpr const char *kernelName = "test_wg_scan_inclusive";
+    static int verify(Type *inptr, Type *outptr, size_t n_elems,
+                      size_t max_wg_size)
+    {
+        for (size_t i = 0; i < n_elems; i += max_wg_size)
+        {
+            size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+            Type result = C::identityValue;
+            for (size_t j = 0; j < wg_size; ++j)
+            {
+                result = C::combine(result, inptr[i + j]);
+                if (result != outptr[i + j])
+                {
+                    log_info("%s_%s: Error at %zu\n", testName, testOpName,
+                             i + j);
+                    return -1;
+                }
+            }
+        }
+        return 0;
+    }
+};
+
+template <typename C> struct ScanExclusive
+{
+    using Type = typename C::Type;
+
+    static constexpr const char *testName = "work_group_scan_exclusive";
+    static constexpr const char *testOpName = C::opName;
+    static constexpr const char *deviceTypeName =
+        TestTypeInfo<Type>::deviceName;
+    static constexpr const char *kernelName = "test_wg_scan_exclusive";
+    static int verify(Type *inptr, Type *outptr, size_t n_elems,
+                      size_t max_wg_size)
+    {
+        for (size_t i = 0; i < n_elems; i += max_wg_size)
+        {
+            size_t wg_size = std::min(max_wg_size, n_elems - i);
+
+            Type result = C::identityValue;
+            for (size_t j = 0; j < wg_size; ++j)
+            {
+                if (result != outptr[i + j])
+                {
+                    log_info("%s_%s: Error at %zu\n", testName, testOpName,
+                             i + j);
+                    return -1;
+                }
+                result = C::combine(result, inptr[i + j]);
+            }
+        }
+        return 0;
+    }
+};
+
+template <typename TestInfo>
+static int run_test(cl_device_id device, cl_context context,
+                    cl_command_queue queue, int n_elems)
+{
+    using T = typename TestInfo::Type;
+
+    cl_int err = CL_SUCCESS;
+
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+
+    std::string funcName = TestInfo::testName;
+    funcName += "_";
+    funcName += TestInfo::testOpName;
+
+    std::string kernelName = TestInfo::kernelName;
+    kernelName += "_";
+    kernelName += TestInfo::testOpName;
+    kernelName += "_";
+    kernelName += TestInfo::deviceTypeName;
+
+    std::string kernelString =
+        make_kernel_string(TestInfo::deviceTypeName, kernelName, funcName);
+
+    const char *kernel_source = kernelString.c_str();
+    err = create_single_kernel_helper(context, &program, &kernel, 1,
+                                      &kernel_source, kernelName.c_str());
+    test_error(err, "Unable to create test kernel");
+
+    size_t wg_size[1];
+    err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
+    test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
+
+    clMemWrapper src = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                      sizeof(T) * n_elems, NULL, &err);
+    test_error(err, "Unable to create source buffer");
+
+    clMemWrapper dst = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                      sizeof(T) * n_elems, NULL, &err);
+    test_error(err, "Unable to create destination buffer");
+
+    std::vector<T> input_ptr(n_elems);
+
+    MTdataHolder d(gRandomSeed);
+    for (int i = 0; i < n_elems; i++)
+    {
+        input_ptr[i] = (T)genrand_int64(d);
+    }
+
+    err = clEnqueueWriteBuffer(queue, src, CL_TRUE, 0, sizeof(T) * n_elems,
+                               input_ptr.data(), 0, NULL, NULL);
+    test_error(err, "clWriteBuffer to initialize src buffer failed");
+
+    err = clSetKernelArg(kernel, 0, sizeof(src), &src);
+    test_error(err, "Unable to set src buffer kernel arg");
+    err |= clSetKernelArg(kernel, 1, sizeof(dst), &dst);
+    test_error(err, "Unable to set dst buffer kernel arg");
+
+    size_t global_work_size[] = { (size_t)n_elems };
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size,
+                                 wg_size, 0, NULL, NULL);
+    test_error(err, "Unable to enqueue test kernel");
+
+    std::vector<T> output_ptr(n_elems);
+
+    cl_uint dead = 0xdeaddead;
+    memset_pattern4(output_ptr.data(), &dead, sizeof(T) * n_elems);
+    err = clEnqueueReadBuffer(queue, dst, CL_TRUE, 0, sizeof(T) * n_elems,
+                              output_ptr.data(), 0, NULL, NULL);
+    test_error(err, "clEnqueueReadBuffer to read read dst buffer failed");
+
+    if (TestInfo::verify(input_ptr.data(), output_ptr.data(), n_elems,
+                         wg_size[0]))
+    {
+        log_error("%s_%s %s failed\n", TestInfo::testName, TestInfo::testOpName,
+                  TestInfo::deviceTypeName);
+        return TEST_FAIL;
+    }
+
+    log_info("%s_%s %s passed\n", TestInfo::testName, TestInfo::testOpName,
+             TestInfo::deviceTypeName);
+    return TEST_PASS;
+}
+
+int test_work_group_reduce_add(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |= run_test<Reduce<Add<cl_int>>>(device, context, queue, n_elems);
+    result |= run_test<Reduce<Add<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |=
+            run_test<Reduce<Add<cl_long>>>(device, context, queue, n_elems);
+        result |=
+            run_test<Reduce<Add<cl_ulong>>>(device, context, queue, n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_reduce_max(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |= run_test<Reduce<Max<cl_int>>>(device, context, queue, n_elems);
+    result |= run_test<Reduce<Max<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |=
+            run_test<Reduce<Max<cl_long>>>(device, context, queue, n_elems);
+        result |=
+            run_test<Reduce<Max<cl_ulong>>>(device, context, queue, n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_reduce_min(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |= run_test<Reduce<Min<cl_int>>>(device, context, queue, n_elems);
+    result |= run_test<Reduce<Min<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |=
+            run_test<Reduce<Min<cl_long>>>(device, context, queue, n_elems);
+        result |=
+            run_test<Reduce<Min<cl_ulong>>>(device, context, queue, n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_inclusive_add(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanInclusive<Add<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanInclusive<Add<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanInclusive<Add<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanInclusive<Add<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_inclusive_max(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanInclusive<Max<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanInclusive<Max<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanInclusive<Max<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanInclusive<Max<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_inclusive_min(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanInclusive<Min<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanInclusive<Min<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanInclusive<Min<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanInclusive<Min<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_exclusive_add(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanExclusive<Add<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanExclusive<Add<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanExclusive<Add<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanExclusive<Add<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_exclusive_max(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanExclusive<Max<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanExclusive<Max<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanExclusive<Max<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanExclusive<Max<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
+
+int test_work_group_scan_exclusive_min(cl_device_id device, cl_context context,
+                                       cl_command_queue queue, int n_elems)
+{
+    int result = TEST_PASS;
+
+    result |=
+        run_test<ScanExclusive<Min<cl_int>>>(device, context, queue, n_elems);
+    result |=
+        run_test<ScanExclusive<Min<cl_uint>>>(device, context, queue, n_elems);
+
+    if (gHasLong)
+    {
+        result |= run_test<ScanExclusive<Min<cl_long>>>(device, context, queue,
+                                                        n_elems);
+        result |= run_test<ScanExclusive<Min<cl_ulong>>>(device, context, queue,
+                                                         n_elems);
+    }
+
+    return result;
+}
-- 
cgit v1.2.3


From 7a0e7e767a1a33e7b7c10954b4a106dedf316e00 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 12 Apr 2022 17:42:55 +0100
Subject: Test all cluster sizes for cl_khr_subgroup_clustered_reduce (#1408)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/test_subgroup_clustered_reduce.cpp   | 39 ++++++++++++++--------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index 527be5ad..b016bf99 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -18,19 +18,29 @@
 #include "subgroup_common_templates.h"
 #include "harness/typeWrappers.h"
 
-#define CLUSTER_SIZE 4
-#define CLUSTER_SIZE_STR "4"
-
 namespace {
 std::string sub_group_clustered_reduce_source = R"(
-__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out) {
+__kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out,
+                      uint cluster_size) {
+        Type r;
         int gid = get_global_id(0);
         XY(xy,gid);
         xy[gid].w = 0;
-        if (sizeof(in[gid]) == sizeof(%s(in[gid], )" CLUSTER_SIZE_STR R"())) {
+        Type v = in[gid];
+        if (sizeof(in[gid]) == sizeof(%s(v, 1))) {
             xy[gid].w = sizeof(in[gid]);
         }
-        out[gid] = %s(in[gid], )" CLUSTER_SIZE_STR R"();
+        switch (cluster_size) {
+            case 1: r = %s(v, 1); break;
+            case 2: r = %s(v, 2); break;
+            case 4: r = %s(v, 4); break;
+            case 8: r = %s(v, 8); break;
+            case 16: r = %s(v, 16); break;
+            case 32: r = %s(v, 32); break;
+            case 64: r = %s(v, 64); break;
+            case 128: r = %s(v, 128); break;
+        }
+        out[gid] = r;
 }       
 )";
 
@@ -94,32 +104,33 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
                 int n = ii + ns > nw ? nw - ii : ns;
                 int midx = 4 * ii + 2;
                 std::vector<Ty> clusters_results;
-                int clusters_counter = ns / CLUSTER_SIZE;
+                int clusters_counter = ns / test_params.cluster_size;
                 clusters_results.resize(clusters_counter);
 
                 // Compute target
                 Ty tr = mx[ii];
                 for (int i = 0; i < n; ++i)
                 {
-                    if (i % CLUSTER_SIZE == 0)
+                    if (i % test_params.cluster_size == 0)
                         tr = mx[ii + i];
                     else
                         tr = calculate<Ty>(tr, mx[ii + i], operation);
-                    clusters_results[i / CLUSTER_SIZE] = tr;
+                    clusters_results[i / test_params.cluster_size] = tr;
                 }
 
                 // Check result
                 for (int i = 0; i < n; ++i)
                 {
                     Ty rr = my[ii + i];
-                    tr = clusters_results[i / CLUSTER_SIZE];
+                    tr = clusters_results[i / test_params.cluster_size];
                     if (!compare(rr, tr))
                     {
                         log_error(
-                            "ERROR: sub_group_clustered_reduce_%s(%s) mismatch "
-                            "for local id %d in sub group %d in group %d\n",
+                            "ERROR: sub_group_clustered_reduce_%s(%s, %u) "
+                            "mismatch for local id %d in sub group %d in group "
+                            "%d\n",
                             operation_names(operation), TypeManager<Ty>::name(),
-                            i, j, k);
+                            test_params.cluster_size, i, j, k);
                         return TEST_FAIL;
                     }
                 }
@@ -184,7 +195,7 @@ int test_subgroup_functions_clustered_reduce(cl_device_id device,
 
     constexpr size_t global_work_size = 2000;
     constexpr size_t local_work_size = 200;
-    WorkGroupParams test_params(global_work_size, local_work_size);
+    WorkGroupParams test_params(global_work_size, local_work_size, -1, 3);
     test_params.save_kernel_source(sub_group_clustered_reduce_source);
     RunTestForType rft(device, context, queue, num_elements, test_params);
 
-- 
cgit v1.2.3


From d533472c27995bb97ed5caab69eca90dd9e5a4ea Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Tue, 19 Apr 2022 18:55:03 +0200
Subject: Fix incorrect use image channel data type and filtering mode (#1375)

---
 test_conformance/spir/sampler_enumeration.zip | Bin 63216 -> 67926 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/test_conformance/spir/sampler_enumeration.zip b/test_conformance/spir/sampler_enumeration.zip
index 5f8a7a06..ab9c9a56 100644
Binary files a/test_conformance/spir/sampler_enumeration.zip and b/test_conformance/spir/sampler_enumeration.zip differ
-- 
cgit v1.2.3


From 03da14d6a9a1525cc585f256404bbfc79ccc0e44 Mon Sep 17 00:00:00 2001
From: Jim Lewis <j.lewis1@samsung.com>
Date: Tue, 19 Apr 2022 11:57:15 -0500
Subject: Fix clang 10 build errors (#1387)

* Fix clang 10 build errors

Lossy casts due to inexact float representation of CL_INT_MAX

* Fix clang format

* Remove implicit-const-int-float-conversion flag
---
 CMakeLists.txt                       |  1 -
 test_common/harness/imageHelpers.cpp | 21 +++++++--------------
 test_common/harness/imageHelpers.h   |  2 +-
 3 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7b307a11..8f5f4472 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -94,7 +94,6 @@ if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang"
     add_cxx_flag_if_supported(-Wno-error=cpp) # Allow #warning directive
     add_cxx_flag_if_supported(-Wno-error=unknown-pragmas) # Issue #785
     add_cxx_flag_if_supported(-Wno-error=asm-operand-widths) # Issue #784
-    add_cxx_flag_if_supported(-Wno-error=implicit-const-int-float-conversion) # Issue #1250
 
     # -msse -mfpmath=sse to force gcc to use sse for float math,
     # avoiding excess precision problems that cause tests like int2float
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 3a5c5533..c380c1f3 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -2624,11 +2624,11 @@ void pack_image_pixel(int *srcVector, const cl_image_format *imageFormat,
     }
 }
 
-int round_to_even(float v)
+cl_int round_to_even(float v)
 {
     // clamp overflow
-    if (v >= -(float)INT_MIN) return INT_MAX;
-    if (v <= (float)INT_MIN) return INT_MIN;
+    if (v >= -(float)CL_INT_MIN) return CL_INT_MAX;
+    if (v <= (float)CL_INT_MIN) return CL_INT_MIN;
 
     // round fractional values to integer value
     if (fabsf(v) < MAKE_HEX_FLOAT(0x1.0p23f, 0x1L, 23))
@@ -2640,7 +2640,7 @@ int round_to_even(float v)
         v -= magicVal;
     }
 
-    return (int)v;
+    return (cl_int)v;
 }
 
 void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
@@ -2765,10 +2765,7 @@ void pack_image_pixel(float *srcVector, const cl_image_format *imageFormat,
         case CL_SIGNED_INT32: {
             cl_int *ptr = (cl_int *)outData;
             for (unsigned int i = 0; i < channelCount; i++)
-                ptr[i] = (int)CONVERT_INT(
-                    srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                    MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                    CL_INT_MAX);
+                ptr[i] = round_to_even(srcVector[i]);
             break;
         }
         case CL_UNSIGNED_INT8: {
@@ -2932,12 +2929,8 @@ void pack_image_pixel_error(const float *srcVector,
         case CL_SIGNED_INT32: {
             const cl_int *ptr = (const cl_int *)results;
             for (unsigned int i = 0; i < channelCount; i++)
-                errors[i] = (cl_float)(
-                    (cl_long)ptr[i]
-                    - (cl_long)CONVERT_INT(
-                        srcVector[i], MAKE_HEX_FLOAT(-0x1.0p31f, -1, 31),
-                        MAKE_HEX_FLOAT(0x1.fffffep30f, 0x1fffffe, 30 - 23),
-                        CL_INT_MAX));
+                errors[i] = (cl_float)((cl_long)ptr[i]
+                                       - (cl_long)round_to_even(srcVector[i]));
             break;
         }
         case CL_UNSIGNED_INT8: {
diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index e728a939..2cc8e68e 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -63,7 +63,7 @@ typedef struct
     bool normalized_coords;
 } image_sampler_data;
 
-int round_to_even(float v);
+cl_int round_to_even(float v);
 
 #define NORMALIZE(v, max) (v < 0 ? 0 : (v > 1.f ? max : round_to_even(v * max)))
 #define NORMALIZE_UNROUNDED(v, max) (v < 0 ? 0 : (v > 1.f ? max : v * max))
-- 
cgit v1.2.3


From 13d1b01f65d106c3462bd3d5222780c3fcf097ea Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason@jlekstrand.net>
Date: Fri, 22 Apr 2022 04:51:32 -0500
Subject: test_basic/enqueue_map: Initialize all the data (#1417)

---
 test_conformance/basic/test_enqueue_map.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/basic/test_enqueue_map.cpp b/test_conformance/basic/test_enqueue_map.cpp
index 3702726f..d28f7e41 100644
--- a/test_conformance/basic/test_enqueue_map.cpp
+++ b/test_conformance/basic/test_enqueue_map.cpp
@@ -146,7 +146,7 @@ int test_enqueue_map_image(cl_device_id deviceID, cl_context context, cl_command
     clMemWrapper memObject;
     log_info("Testing with cl_mem_flags src: %s\n", flag_set_names[src_flag_id]);
 
-    generate_random_data(kUInt, (unsigned int)(imageSize * imageSize), d,
+    generate_random_data(kUInt, (unsigned int)(imageSize * imageSize * 4), d,
                          hostPtrData);
     memcpy(referenceData, hostPtrData, imageDataSize);
 
-- 
cgit v1.2.3


From 35c21a8e06f94ffd84bdfe0f94a2aa0deb3d1013 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Thu, 28 Apr 2022 23:46:52 +0200
Subject: imageHelpers: add CL_UNORM_SHORT_{555, 565} in get_max_absolute_error
 (#1406)

* imageHelpers: add CL_UNORM_SHORT_{555, 565} in get_max_absolute_error

Working on a device supporting CL_UNORM_SHORT_565 image data type, I
noticed that the max absolute error authorized was not the right one
for such image data type.

Also because of normalization, there is always an absolute error
authorized whatever the filtering of the sampler.

Ref #1140

* put back if statement on filter_mode
---
 test_common/harness/imageHelpers.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index c380c1f3..a254c48f 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -924,6 +924,8 @@ float get_max_absolute_error(const cl_image_format *format,
 #ifdef CL_SFIXED14_APPLE
         case CL_SFIXED14_APPLE: return 0x1.0p-14f;
 #endif
+        case CL_UNORM_SHORT_555:
+        case CL_UNORM_SHORT_565: return 1.0f / 31.0f;
         default: return 0.0f;
     }
 }
-- 
cgit v1.2.3


From 5d6ca3e9d1374ef32644847c1eefeb503a27b732 Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Thu, 28 Apr 2022 23:34:08 +0100
Subject: Change memory order and scope for atomics that gate final results
 being stored. (#1377)

* Change memory order and scope for atomics that gate final results being stored.

memory_order_acq_rel with memory_scope_device is now used to guarantee that the correct memory consistency is observed before final results are stored.

Previously it was possible for kernels to be generated that all used relaxed memory ordering, which could lead to false-positive failures.

Fixes #1370

* Disable atomics tests with global, in-program atomics.

If the device does not support `memory_order_relaxed` or `memory_scope_device`, disable atomics tests that declare their atomics in-program with global memory.

There is now an implicit requirement to support `memory_order_relaxed` and `memory_scope_device` for these tests.

* Fix misplaced parentheses.

* Change memory scope for atomic fetch and load calls in kernel

Change the memory scope from memory_scope_work_group to
memory_scope_device so the ordering applies across all work items

Co-authored-by: Sreelakshmi Haridas <sharidas@quicinc.com>
---
 test_conformance/c11_atomics/common.h | 51 +++++++++++++++++++++++++++--------
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index d30259f0..42fe32b6 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -1031,20 +1031,40 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
         }
         code += "\n";
     }
-    if (LocalMemory() || DeclaredInProgram())
+    if (LocalMemory())
     {
         code += "  // Copy final values to host reachable buffer\n";
-        if (LocalMemory())
-            code += "  barrier(CLK_LOCAL_MEM_FENCE);\n"
-                    "  if(get_local_id(0) == 0) // first thread in workgroup\n";
+        code += "  barrier(CLK_LOCAL_MEM_FENCE);\n"
+                "  if(get_local_id(0) == 0) // first thread in workgroup\n";
+        code += "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
+                "dstItemIdx++)\n";
+        if (aTypeName == "atomic_flag")
+        {
+            code += R"(
+                finalDest[dstItemIdx] =
+                    atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
+                                                      memory_order_relaxed,
+                                                      memory_scope_work_group);)";
+        }
         else
-            // global atomics declared in program scope
+        {
             code += R"(
-                if(atomic_fetch_add_explicit(&finishedThreads, 1u,
-                                           memory_order_relaxed,
-                                           memory_scope_work_group)
+                finalDest[dstItemIdx] =
+                    atomic_load_explicit(destMemory+dstItemIdx,
+                                         memory_order_relaxed,
+                                         memory_scope_work_group);)";
+        }
+    }
+    else if (DeclaredInProgram())
+    {
+        // global atomics declared in program scope
+        code += "  // Copy final values to host reachable buffer\n";
+        code += R"(
+            if(atomic_fetch_add_explicit(&finishedThreads, 1u,
+                                         memory_order_acq_rel,
+                                         memory_scope_device)
                    == get_global_size(0)-1) // last finished thread
-                   )";
+                )";
         code += "    for(uint dstItemIdx = 0; dstItemIdx < numDestItems; "
                 "dstItemIdx++)\n";
         if (aTypeName == "atomic_flag")
@@ -1053,7 +1073,7 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
                 finalDest[dstItemIdx] =
                     atomic_flag_test_and_set_explicit(destMemory+dstItemIdx,
                                                       memory_order_relaxed,
-                                                      memory_scope_work_group);)";
+                                                      memory_scope_device);)";
         }
         else
         {
@@ -1061,7 +1081,7 @@ CBasicTest<HostAtomicType, HostDataType>::KernelCode(cl_uint maxNumDestItems)
                 finalDest[dstItemIdx] =
                     atomic_load_explicit(destMemory+dstItemIdx,
                                          memory_order_relaxed,
-                                         memory_scope_work_group);)";
+                                         memory_scope_device);)";
         }
     }
     code += "}\n"
@@ -1108,6 +1128,15 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
         log_info("\t\tTest disabled\n");
         return 0;
     }
+    if (!LocalMemory() && DeclaredInProgram())
+    {
+        if (((gAtomicMemCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE) == 0)
+            || ((gAtomicMemCap & CL_DEVICE_ATOMIC_ORDER_ACQ_REL) == 0))
+        {
+            log_info("\t\tTest disabled\n");
+            return 0;
+        }
+    }
 
     // set up work sizes based on device capabilities and test configuration
     error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE,
-- 
cgit v1.2.3


From 3662d1744778e333fd593312cb9083a245fc44d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Fri, 29 Apr 2022 18:42:27 +0100
Subject: Update Github Actions CI and add Windows (#1413)

- Add one Windows build to Github Actions
- Remove Appveyor config
- Move a few build steps out of the script
- Use Ninja as the generator (makes for more readable logs)
- Add build cache (except on Windows where it seems to break)

Change-Id: Ida90ee1842af98aff86e5144ab7b9766480378c9
Signed-off-by: Kevin Petit <kevin.petit@arm.com>
---
 .appveyor.yml                   | 54 -----------------------------------------
 .github/workflows/presubmit.yml | 29 +++++++++++++++++++---
 presubmit.sh                    | 31 +++++++++++------------
 3 files changed, 41 insertions(+), 73 deletions(-)
 delete mode 100644 .appveyor.yml

diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index ea010778..00000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-os:
-  - Visual Studio 2017
-
-shallow_clone: true
-
-platform:
-  - Win32
-  - x64
-
-configuration:
-  - Release
-
-environment:
-  matrix:
-   - SETARCH: i686
-   - SETARCH: x86_64
-
-matrix:
-  exclude:
-    - platform: Win32
-      SETARCH: x86_64
-    - platform: x64
-      SETARCH: i686
-
-before_build:
-  # Setup environment:
-  - ps: $env:TOP = $env:APPVEYOR_BUILD_FOLDER
-  - ps: $env:TOP
-  - echo %TOP%
-  # Get the OpenCL Headers:
-  - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-Headers OpenCL-Headers
-  # Get and build the OpenCL ICD Loader:
-  - git clone --depth=1 https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
-  - ps: cd OpenCL-ICD-Loader
-  - ps: mkdir build
-  - ps: cd build
-  - cmake -A%PLATFORM% -DENABLE_OPENCL30_PROVISIONAL=1 -DOPENCL_ICD_LOADER_HEADERS_DIR=%TOP%/OpenCL-Headers/ ..
-  - cmake --build . --config %CONFIGURATION%
-  - ps: cd $env:TOP
-  # Get the libclcxx standard library:
-  - git clone --depth=1 https://github.com/KhronosGroup/libclcxx.git libclcxx
-  # Generate the CTS solution file:
-  - cmake -DCL_INCLUDE_DIR=%TOP%/OpenCL-Headers
-          -DCL_LIB_DIR=%TOP%/OpenCL-ICD-Loader/build
-          -DCL_LIBCLCXX_DIR=%TOP%/libclcxx
-          -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin
-          -DOPENCL_LIBRARIES="OpenCL"
-          -H. -Bbuild_win -A%PLATFORM%
-          -DD3D10_IS_SUPPORTED=ON -DD3D11_IS_SUPPORTED=ON -DARCH=%SETARCH%
-
-build:
-  project: build_win\CLConform.sln
-  parallel: true
-  verbosity: normal
diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index 2aedc199..bac4ceba 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -3,30 +3,51 @@ on: [push, pull_request]
 
 jobs:
   build:
-    name: Build ${{ matrix.os }} ${{ matrix.name }}
+    name: Build ${{ matrix.os }} ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     env:
       JOB_ARCHITECTURE: ${{ matrix.arch }}
       JOB_ENABLE_GL: ${{ matrix.gl }}
     strategy:
+      fail-fast: false
       matrix:
         mainmatrix: [true]
-        os: [ubuntu-20.04, macos-latest]
+        os: [ubuntu-20.04, macos-latest, windows-latest]
         include:
           - os: ubuntu-20.04
             mainmatrix: true
             gl: 1
           - os: ubuntu-20.04
             mainmatrix: false
-            name: Arm
             arch: arm
           - os: ubuntu-20.04
             mainmatrix: false
-            name: AArch64
             arch: aarch64
     steps:
       - uses: actions/checkout@v2
+      - name: Setup Ninja
+        uses: seanmiddleditch/gha-setup-ninja@master
+      - name: Setup OpenGL build dependencies
+        if: ${{ matrix.gl }}
+        run: |
+          sudo apt-get update
+          sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
+      - name: Setup MSVC with Ninja
+        uses: ilammy/msvc-dev-cmd@v1
+      - name: Setup ccache
+        uses: hendrikmuhs/ccache-action@v1.2
+        with:
+          variant: sccache
+          key: ${{ matrix.os }}-${{ matrix.arch }}
+      - name: Fetch OpenCL Headers
+        shell: bash
+        run: |
+          git clone https://github.com/KhronosGroup/OpenCL-Headers.git
+          cd OpenCL-Headers
+          ln -s CL OpenCL # For OSX builds
+          cd ..
       - name: Build
+        shell: bash
         run: ./presubmit.sh
   formatcheck:
     name: Check code format
diff --git a/presubmit.sh b/presubmit.sh
index 6fc037c8..b63a4373 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -15,7 +15,7 @@ touch ${TOOLCHAIN_FILE}
 BUILD_OPENGL_TEST="OFF"
 
 # Prepare toolchain if needed
-if [[ ${JOB_ARCHITECTURE} != "" ]]; then
+if [[ ${JOB_ARCHITECTURE} != "" && ${RUNNER_OS} != "Windows" ]]; then
     TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE}
     TOOLCHAIN_URL=${!TOOLCHAIN_URL_VAR}
     wget ${TOOLCHAIN_URL}
@@ -38,35 +38,36 @@ fi
 
 if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
     BUILD_OPENGL_TEST="ON"
-    sudo apt-get update
-    sudo apt-get -y install libglu1-mesa-dev freeglut3-dev mesa-common-dev libglew-dev
 fi
-# Prepare headers
-git clone https://github.com/KhronosGroup/OpenCL-Headers.git
-cd OpenCL-Headers
-ln -s CL OpenCL # For OSX builds
-cd ..
 
 # Get and build loader
 git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
 cd ${TOP}/OpenCL-ICD-Loader
 mkdir build
 cd build
-cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/ ..
-make
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
+cmake --build . -j2 --config Release
 
 # Build CTS
 cd ${TOP}
 ls -l
 mkdir build
 cd build
-cmake -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
+if [[ ${RUNNER_OS} == "Windows" ]]; then
+  CMAKE_OPENCL_LIBRARIES_OPTION="OpenCL"
+  CMAKE_CACHE_OPTIONS=""
+else
+  CMAKE_OPENCL_LIBRARIES_OPTION="-lOpenCL -lpthread"
+  CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache"
+fi
+cmake .. -G Ninja \
+      ${CMAKE_CACHE_OPTIONS} \
+      -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
       -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
       -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
       -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
-      -DOPENCL_LIBRARIES="-lOpenCL -lpthread" \
+      -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \
       -DUSE_CL_EXPERIMENTAL=ON \
-      -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
-      ..
-make -j2
+      -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST}
+cmake --build . -j3 --config Release
 
-- 
cgit v1.2.3


From 5149de22777158936b8c078a234206e1ffcfbde6 Mon Sep 17 00:00:00 2001
From: Karol Herbst <karolherbst@gmail.com>
Date: Tue, 17 May 2022 17:51:10 +0200
Subject: api/kernel_arg_info: Check for read_write image support before
 testing it (#1420)

Code taken from api/test_min_image_formats.cpp
---
 test_conformance/api/test_kernel_arg_info.cpp | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index dddb4a23..28825f10 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -814,8 +814,34 @@ static int run_image_tests(cl_context context, cl_device_id deviceID)
     cl_kernel_arg_address_qualifier address_qualifier =
         CL_KERNEL_ARG_ADDRESS_GLOBAL;
 
+    Version version = get_device_cl_version(deviceID);
+    bool supports_read_write_images = false;
+    if (version >= Version(3, 0))
+    {
+        cl_uint maxReadWriteImageArgs = 0;
+        cl_int error = clGetDeviceInfo(
+            deviceID, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
+            sizeof(maxReadWriteImageArgs), &maxReadWriteImageArgs, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS");
+
+        // read-write images are supported if MAX_READ_WRITE_IMAGE_ARGS is
+        // nonzero
+        supports_read_write_images = maxReadWriteImageArgs != 0;
+    }
+    else if (version >= Version(2, 0))
+    {
+        // read-write images are required for OpenCL 2.x
+        supports_read_write_images = true;
+    }
+
     for (auto access_qualifier : access_qualifiers)
     {
+        if (access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE
+            && !supports_read_write_images)
+            continue;
+
         bool is_write =
             (access_qualifier == CL_KERNEL_ARG_ACCESS_WRITE_ONLY
              || access_qualifier == CL_KERNEL_ARG_ACCESS_READ_WRITE);
-- 
cgit v1.2.3


From 6e6249fb489afbdc628e3d412aed9199ed006d48 Mon Sep 17 00:00:00 2001
From: Jason Ekstrand <jason.ekstrand@collabora.com>
Date: Tue, 17 May 2022 10:51:53 -0500
Subject: images: Stop checking gDeviceType != CL_DEVICE_TYPE_GPU (#1418)

* images: Stop checking gDeviceType != CL_DEVICE_TYPE_GPU

If the device type also advertises CL_DEVICE_TYPE_DEFAULT (which should
be valid), this causes it to be considered a CPU device and the tests
enforce different precision and rounding expectations.

* Fix clang-format

* Drop redundant NORM_OFFSET checks
---
 .../images/kernel_read_write/test_common.cpp       | 10 ++--
 .../images/kernel_read_write/test_iterations.cpp   | 68 ++++++++++++++++------
 .../images/kernel_read_write/test_loops.cpp        |  2 +-
 .../images/kernel_read_write/test_read_1D.cpp      | 48 +++++++++++----
 .../kernel_read_write/test_read_1D_array.cpp       | 52 ++++++++++++-----
 .../kernel_read_write/test_read_2D_array.cpp       | 55 ++++++++++++-----
 6 files changed, 169 insertions(+), 66 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index 6b3cf849..62bd4ab1 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -557,7 +557,7 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                 // Apple requires its CPU implementation to do
                                 // correctly rounded address arithmetic in all
                                 // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
                             )
                                 offset = 0.0f; // Loop only once
@@ -875,7 +875,7 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                 // Apple requires its CPU implementation to do
                                 // correctly rounded address arithmetic in all
                                 // modes
-                                || gDeviceType != CL_DEVICE_TYPE_GPU
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
                             )
                                 offset = 0.0f; // Loop only once
@@ -1214,7 +1214,8 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                         // offsets (0.0, 0.0) E.g., test one
                                         // pixel.
                                         if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                                            || !(gDeviceType
+                                                 & CL_DEVICE_TYPE_GPU)
                                             || NORM_OFFSET == 0)
                                         {
                                             norm_offset_x = 0.0f;
@@ -1396,7 +1397,8 @@ int test_read_image(cl_context context, cl_command_queue queue,
                                         // offsets (0.0, 0.0) E.g., test one
                                         // pixel.
                                         if (!imageSampler->normalized_coords
-                                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                                            || !(gDeviceType
+                                                 & CL_DEVICE_TYPE_GPU)
                                             || NORM_OFFSET == 0)
                                         {
                                             norm_offset_x = 0.0f;
diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 3b779fab..2f5c75a7 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -415,12 +415,15 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
                 int checkOnlyOnePixel = 0;
                 int found_pixel = 0;
                 float offset = NORM_OFFSET;
-                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                if (!imageSampler->normalized_coords
+                    || imageSampler->filter_mode != CL_FILTER_NEAREST
+                    || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                    || gDeviceType != CL_DEVICE_TYPE_GPU
+                    // Apple requires its CPU implementation to do correctly
+                    // rounded address arithmetic in all modes
+                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                    )
+                )
                     offset = 0.0f;          // Loop only once
 
                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -474,7 +477,10 @@ int validate_image_2D_depth_results(void *imageValues, void *resultValues, doubl
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -569,12 +575,15 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
                 int checkOnlyOnePixel = 0;
                 int found_pixel = 0;
                 float offset = NORM_OFFSET;
-                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                if (!imageSampler->normalized_coords
+                    || imageSampler->filter_mode != CL_FILTER_NEAREST
+                    || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                    || gDeviceType != CL_DEVICE_TYPE_GPU
+                    // Apple requires its CPU implementation to do correctly
+                    // rounded address arithmetic in all modes
+                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                    )
+                )
                     offset = 0.0f;          // Loop only once
 
                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -658,7 +667,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -778,7 +790,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
 
                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                         // E.g., test one pixel.
-                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                        if (!imageSampler->normalized_coords
+                            || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                            || NORM_OFFSET == 0)
+                        {
                             norm_offset_x = 0.0f;
                             norm_offset_y = 0.0f;
                             checkOnlyOnePixel = 1;
@@ -813,7 +828,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -874,7 +892,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
 
                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                         // E.g., test one pixel.
-                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                        if (!imageSampler->normalized_coords
+                            || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                            || NORM_OFFSET == 0)
+                        {
                             norm_offset_x = 0.0f;
                             norm_offset_y = 0.0f;
                             checkOnlyOnePixel = 1;
@@ -909,7 +930,10 @@ int validate_image_2D_results(void *imageValues, void *resultValues, double form
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -975,12 +999,15 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
                 int checkOnlyOnePixel = 0;
                 int found_pixel = 0;
                 float offset = NORM_OFFSET;
-                if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                if (!imageSampler->normalized_coords
+                    || imageSampler->filter_mode != CL_FILTER_NEAREST
+                    || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                    // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                    || gDeviceType != CL_DEVICE_TYPE_GPU
+                    // Apple requires its CPU implementation to do correctly
+                    // rounded address arithmetic in all modes
+                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                    )
+                )
                     offset = 0.0f;          // Loop only once
 
                 for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -1054,7 +1081,10 @@ int validate_image_2D_sRGB_results(void *imageValues, void *resultValues, double
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
diff --git a/test_conformance/images/kernel_read_write/test_loops.cpp b/test_conformance/images/kernel_read_write/test_loops.cpp
index 795a9eda..ea1e1c7c 100644
--- a/test_conformance/images/kernel_read_write/test_loops.cpp
+++ b/test_conformance/images/kernel_read_write/test_loops.cpp
@@ -84,7 +84,7 @@ int test_read_image_type(cl_device_id device, cl_context context,
     // of operations for linear filtering on the GPU.  We do not test linear
     // filtering for the CL_RGB CL_UNORM_INT_101010 image format; however, we
     // test it internally for a set of other image formats.
-    if ((gDeviceType == CL_DEVICE_TYPE_GPU)
+    if ((gDeviceType & CL_DEVICE_TYPE_GPU)
         && (imageSampler->filter_mode == CL_FILTER_LINEAR)
         && (format->image_channel_order == CL_RGB)
         && (format->image_channel_data_type == CL_UNORM_INT_101010))
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index 68113f9a..e9306fc4 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -487,10 +487,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
                     int checkOnlyOnePixel = 0;
                     int found_pixel = 0;
                     float offset = NORM_OFFSET;
-                    if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                    if (!imageSampler->normalized_coords
+                        || imageSampler->filter_mode != CL_FILTER_NEAREST
+                        || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                        || gDeviceType != CL_DEVICE_TYPE_GPU
+                        // Apple requires its CPU implementation to do correctly
+                        // rounded address arithmetic in all modes
+                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
                     )
                         offset = 0.0f;          // Loop only once
@@ -553,7 +556,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     checkOnlyOnePixel = 1;
                                 }
@@ -646,10 +652,13 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
                     int checkOnlyOnePixel = 0;
                     int found_pixel = 0;
                     float offset = NORM_OFFSET;
-                    if (!imageSampler->normalized_coords || imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                    if (!imageSampler->normalized_coords
+                        || imageSampler->filter_mode != CL_FILTER_NEAREST
+                        || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                        || gDeviceType != CL_DEVICE_TYPE_GPU
+                        // Apple requires its CPU implementation to do correctly
+                        // rounded address arithmetic in all modes
+                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
                     )
                         offset = 0.0f;          // Loop only once
@@ -720,7 +729,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     checkOnlyOnePixel = 1;
                                 }
@@ -826,7 +838,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 checkOnlyOnePixel = 1;
                             }
@@ -857,7 +872,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     checkOnlyOnePixel = 1;
                                 }
@@ -913,7 +931,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 checkOnlyOnePixel = 1;
                             }
@@ -944,7 +965,10 @@ int test_read_image_1D( cl_context context, cl_command_queue queue, cl_kernel ke
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     checkOnlyOnePixel = 1;
                                 }
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index ac266ad7..2f4e4d3b 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -578,12 +578,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
                     int checkOnlyOnePixel = 0;
                     int found_pixel = 0;
                     float offset = NORM_OFFSET;
-                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                    if (!imageSampler->normalized_coords
+                        || imageSampler->filter_mode != CL_FILTER_NEAREST
+                        || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                        || gDeviceType != CL_DEVICE_TYPE_GPU
+                        // Apple requires its CPU implementation to do correctly
+                        // rounded address arithmetic in all modes
+                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                        )
+                    )
                         offset = 0.0f;          // Loop only once
 
                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -647,7 +650,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     norm_offset_y = 0.0f;
                                     checkOnlyOnePixel = 1;
@@ -746,12 +752,15 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
                     int checkOnlyOnePixel = 0;
                     int found_pixel = 0;
                     float offset = NORM_OFFSET;
-                    if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                    if (!imageSampler->normalized_coords
+                        || imageSampler->filter_mode != CL_FILTER_NEAREST
+                        || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                        // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                        || gDeviceType != CL_DEVICE_TYPE_GPU
+                        // Apple requires its CPU implementation to do correctly
+                        // rounded address arithmetic in all modes
+                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                        )
+                    )
                         offset = 0.0f;          // Loop only once
 
                     for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel; norm_offset_x += NORM_OFFSET) {
@@ -824,7 +833,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     norm_offset_y = 0.0f;
                                     checkOnlyOnePixel = 1;
@@ -935,7 +947,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -965,7 +980,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     norm_offset_y = 0.0f;
                                     checkOnlyOnePixel = 1;
@@ -1021,7 +1039,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                             // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                             // E.g., test one pixel.
-                            if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                            if (!imageSampler->normalized_coords
+                                || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                || NORM_OFFSET == 0)
+                            {
                                 norm_offset_x = 0.0f;
                                 norm_offset_y = 0.0f;
                                 checkOnlyOnePixel = 1;
@@ -1051,7 +1072,10 @@ int test_read_image_1D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                 // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                 // E.g., test one pixel.
-                                if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                if (!imageSampler->normalized_coords
+                                    || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                    || NORM_OFFSET == 0)
+                                {
                                     norm_offset_x = 0.0f;
                                     norm_offset_y = 0.0f;
                                     checkOnlyOnePixel = 1;
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index 11b78814..d71bfec4 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -597,12 +597,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                         int checkOnlyOnePixel = 0;
                         int found_pixel = 0;
                         float offset = NORM_OFFSET;
-                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                        if (!imageSampler->normalized_coords
+                            || imageSampler->filter_mode != CL_FILTER_NEAREST
+                            || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                            // Apple requires its CPU implementation to do
+                            // correctly rounded address arithmetic in all modes
+                            || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                            )
+                        )
                             offset = 0.0f;          // Loop only once
 
                         for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -738,12 +741,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                         int checkOnlyOnePixel = 0;
                         int found_pixel = 0;
                         float offset = NORM_OFFSET;
-                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                        if (!imageSampler->normalized_coords
+                            || imageSampler->filter_mode != CL_FILTER_NEAREST
+                            || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                            // Apple requires its CPU implementation to do
+                            // correctly rounded address arithmetic in all modes
+                            || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                            )
+                        )
                             offset = 0.0f;          // Loop only once
 
                         for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -915,12 +921,15 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
                         int checkOnlyOnePixel = 0;
                         int found_pixel = 0;
                         float offset = NORM_OFFSET;
-                        if (!imageSampler->normalized_coords ||  imageSampler->filter_mode != CL_FILTER_NEAREST || NORM_OFFSET == 0
+                        if (!imageSampler->normalized_coords
+                            || imageSampler->filter_mode != CL_FILTER_NEAREST
+                            || NORM_OFFSET == 0
 #if defined( __APPLE__ )
-                            // Apple requires its CPU implementation to do correctly rounded address arithmetic in all modes
-                            || gDeviceType != CL_DEVICE_TYPE_GPU
+                            // Apple requires its CPU implementation to do
+                            // correctly rounded address arithmetic in all modes
+                            || !(gDeviceType & CL_DEVICE_TYPE_GPU)
 #endif
-                            )
+                        )
                             offset = 0.0f;          // Loop only once
 
                         for (float norm_offset_x = -offset; norm_offset_x <= offset && !found_pixel ; norm_offset_x += NORM_OFFSET) {
@@ -1108,7 +1117,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                     // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                     // E.g., test one pixel.
-                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    if (!imageSampler->normalized_coords
+                                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                        || NORM_OFFSET == 0)
+                                    {
                                         norm_offset_x = 0.0f;
                                         norm_offset_y = 0.0f;
                                         norm_offset_z = 0.0f;
@@ -1147,7 +1159,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                         // E.g., test one pixel.
-                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                        if (!imageSampler->normalized_coords
+                                            || !(gDeviceType
+                                                 & CL_DEVICE_TYPE_GPU)
+                                            || NORM_OFFSET == 0)
+                                        {
                                             norm_offset_x = 0.0f;
                                             norm_offset_y = 0.0f;
                                             norm_offset_z = 0.0f;
@@ -1216,7 +1232,10 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                     // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                     // E.g., test one pixel.
-                                    if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0) {
+                                    if (!imageSampler->normalized_coords
+                                        || !(gDeviceType & CL_DEVICE_TYPE_GPU)
+                                        || NORM_OFFSET == 0)
+                                    {
                                         norm_offset_x = 0.0f;
                                         norm_offset_y = 0.0f;
                                         norm_offset_z = 0.0f;
@@ -1255,7 +1274,11 @@ int test_read_image_2D_array( cl_context context, cl_command_queue queue, cl_ker
 
                                         // If we are not on a GPU, or we are not normalized, then only test with offsets (0.0, 0.0)
                                         // E.g., test one pixel.
-                                        if (!imageSampler->normalized_coords || gDeviceType != CL_DEVICE_TYPE_GPU || NORM_OFFSET == 0 || NORM_OFFSET == 0 || NORM_OFFSET == 0) {
+                                        if (!imageSampler->normalized_coords
+                                            || !(gDeviceType
+                                                 & CL_DEVICE_TYPE_GPU)
+                                            || NORM_OFFSET == 0)
+                                        {
                                             norm_offset_x = 0.0f;
                                             norm_offset_y = 0.0f;
                                             norm_offset_z = 0.0f;
-- 
cgit v1.2.3


From d54954c7cfd4311d12d076b205ee632b0d6cc151 Mon Sep 17 00:00:00 2001
From: Jeremy Kemp <jeremy@jeremykemp.co.uk>
Date: Tue, 17 May 2022 16:52:40 +0100
Subject: Enable mipmap extension pragmas (#1349)

* Enable mipmap pragmas where appopriate.

* clang-format changes.
---
 .../images/kernel_read_write/test_iterations.cpp   | 56 +++++++++++---------
 .../images/kernel_read_write/test_read_1D.cpp      | 50 +++++++++---------
 .../kernel_read_write/test_read_1D_array.cpp       | 54 +++++++++++---------
 .../kernel_read_write/test_read_2D_array.cpp       | 59 ++++++++++++----------
 .../images/kernel_read_write/test_read_3D.cpp      | 59 +++++++++++++---------
 .../images/kernel_read_write/test_write_1D.cpp     | 42 ++++++++-------
 .../kernel_read_write/test_write_1D_array.cpp      | 44 +++++++++-------
 .../kernel_read_write/test_write_2D_array.cpp      | 54 ++++++++++++--------
 .../images/kernel_read_write/test_write_3D.cpp     | 53 +++++++++++--------
 .../images/kernel_read_write/test_write_image.cpp  | 50 ++++++++++--------
 10 files changed, 296 insertions(+), 225 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_iterations.cpp b/test_conformance/images/kernel_read_write/test_iterations.cpp
index 2f5c75a7..05aed02c 100644
--- a/test_conformance/images/kernel_read_write/test_iterations.cpp
+++ b/test_conformance/images/kernel_read_write/test_iterations.cpp
@@ -39,24 +39,28 @@ static size_t reduceImageSizeRange(size_t maxDimSize) {
 }
 
 const char *read2DKernelSourcePattern =
-"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_only %s input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+    "}";
 
 const char *read_write2DKernelSourcePattern =
-"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_write %s input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global %s%s *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, coords %s);\n"
+    "}";
 
 const char *intCoordKernelSource =
 "   int2 coords = (int2)( xOffsets[offset], yOffsets[offset]);\n";
@@ -1691,16 +1695,18 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
     }
 
 
-    sprintf( programSrc, KernelSourcePattern,
-            (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
-            samplerArg, get_explicit_type_name( outputType ),
+    sprintf(programSrc, KernelSourcePattern,
+            gTestMipmaps
+                ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+                : "",
+            (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
+                                                      : "image2d_t",
+            samplerArg, get_explicit_type_name(outputType),
             (format->image_channel_order == CL_DEPTH) ? "" : "4",
-            gTestMipmaps?", float lod":" ",
-            samplerVar,
-            gTestMipmaps? lodOffsetSource : offsetSource,
-            floatCoords ? floatKernelSource : intCoordKernelSource,
-            readFormat,
-            gTestMipmaps?", lod":" ");
+            gTestMipmaps ? ", float lod" : " ", samplerVar,
+            gTestMipmaps ? lodOffsetSource : offsetSource,
+            floatCoords ? floatKernelSource : intCoordKernelSource, readFormat,
+            gTestMipmaps ? ", lod" : " ");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_read_1D.cpp b/test_conformance/images/kernel_read_write/test_read_1D.cpp
index e9306fc4..2a722088 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D.cpp
@@ -26,24 +26,28 @@
 #endif
 
 const char *read1DKernelSourcePattern =
-"__kernel void sample_kernel( read_only image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0);\n"
-"   int offset = tidX;\n"
-"%s"
-"   results[offset] = read_image%s( input, imageSampler, coord %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_only image1d_t input,%s __global float "
+    "*xOffsets, __global %s4 *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0);\n"
+    "   int offset = tidX;\n"
+    "%s"
+    "   results[offset] = read_image%s( input, imageSampler, coord %s);\n"
+    "}";
 
 const char *read_write1DKernelSourcePattern =
-"__kernel void sample_kernel( read_write image1d_t input,%s __global float *xOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0);\n"
-"   int offset = tidX;\n"
-"%s"
-"   results[offset] = read_image%s( input, coord %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_write image1d_t input,%s __global float "
+    "*xOffsets, __global %s4 *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0);\n"
+    "   int offset = tidX;\n"
+    "%s"
+    "   results[offset] = read_image%s( input, coord %s);\n"
+    "}";
 
 const char *int1DCoordKernelSource =
 "   int coord = xOffsets[offset];\n";
@@ -1075,14 +1079,14 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
     {
         KernelSourcePattern = read1DKernelSourcePattern;
     }
-    sprintf( programSrc,
-            KernelSourcePattern,
-            samplerArg, get_explicit_type_name( outputType ),
-            gTestMipmaps ? ", float lod" : "",
-            samplerVar,
+    sprintf(programSrc, KernelSourcePattern,
+            gTestMipmaps
+                ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+                : "",
+            samplerArg, get_explicit_type_name(outputType),
+            gTestMipmaps ? ", float lod" : "", samplerVar,
             floatCoords ? float1DKernelSource : int1DCoordKernelSource,
-            readFormat,
-            gTestMipmaps ? ", lod" : "" );
+            readFormat, gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
 
diff --git a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
index 2f4e4d3b..a8009420 100644
--- a/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_1D_array.cpp
@@ -25,24 +25,28 @@
 #endif
 
 const char *read1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_only image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_only image1d_array_t input,%s __global "
+    "float *xOffsets, __global float *yOffsets, __global %s4 *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+    "}";
 
 const char *read_write1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_write image1d_array_t input,%s __global float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_write image1d_array_t input,%s __global "
+    "float *xOffsets, __global float *yOffsets, __global %s4 *results %s )\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, coords %s);\n"
+    "}";
 
 const char *offset1DArrayKernelSource =
 "   int offset = tidY*get_image_width(input) + tidX;\n";
@@ -1180,15 +1184,15 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
         KernelSourcePattern = read_write1DArrayKernelSourcePattern;
     }
 
-    sprintf( programSrc,
-            KernelSourcePattern,
-            samplerArg, get_explicit_type_name( outputType ),
-            gTestMipmaps ? ", float lod" : "",
-            samplerVar,
-            gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource,
-            floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
-            readFormat,
-            gTestMipmaps ? ", lod" : "" );
+    sprintf(
+        programSrc, KernelSourcePattern,
+        gTestMipmaps ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+                     : "",
+        samplerArg, get_explicit_type_name(outputType),
+        gTestMipmaps ? ", float lod" : "", samplerVar,
+        gTestMipmaps ? offset1DArrayLodKernelSource : offset1DArrayKernelSource,
+        floatCoords ? floatKernelSource1DArray : intCoordKernelSource1DArray,
+        readFormat, gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
index d71bfec4..533a0fe8 100644
--- a/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_2D_array.cpp
@@ -41,24 +41,32 @@ static size_t reduceImageDepth(size_t maxDepth) {
 }
 
 const char *read2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_only %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s%s *results %s )\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_only %s input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global float *zOffsets,  __global "
+    "%s%s *results %s )\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+    "}";
 
 const char *read_write2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( read_write %s input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s%s *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_write %s input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global float *zOffsets,  __global "
+    "%s%s *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, coords %s);\n"
+    "}";
 
 const char* offset2DarraySource ="   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
 const char* offset2DarraySourceLod =
@@ -1412,17 +1420,16 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
     }
 
     // Construct the source
-    sprintf( programSrc,
-            KernelSourcePattern,
-            imageType,
-            samplerArg, get_explicit_type_name( outputType ),
-            imageElement,
-            gTestMipmaps ? ", float lod" : " ",
-            samplerVar,
+    sprintf(programSrc, KernelSourcePattern,
+            gTestMipmaps
+                ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+                : "",
+            imageType, samplerArg, get_explicit_type_name(outputType),
+            imageElement, gTestMipmaps ? ", float lod" : " ", samplerVar,
             gTestMipmaps ? offset2DarraySourceLod : offset2DarraySource,
-            floatCoords ? float2DArrayUnnormalizedCoordKernelSource : int2DArrayCoordKernelSource,
-            readFormat,
-            gTestMipmaps ? ", lod" : " " );
+            floatCoords ? float2DArrayUnnormalizedCoordKernelSource
+                        : int2DArrayCoordKernelSource,
+            readFormat, gTestMipmaps ? ", lod" : " ");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_read_3D.cpp b/test_conformance/images/kernel_read_write/test_read_3D.cpp
index 860114fb..cec77bf0 100644
--- a/test_conformance/images/kernel_read_write/test_read_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_read_3D.cpp
@@ -36,24 +36,32 @@ static size_t reduceImageDepth(size_t maxDimSize, RandomSeed& seed) {
 
 
 const char *read3DKernelSourcePattern =
-"__kernel void sample_kernel( read_only image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_only image3d_t input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global float *zOffsets,  __global "
+    "%s4 *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, imageSampler, coords %s);\n"
+    "}";
 
 const char *read_write3DKernelSourcePattern =
-"__kernel void sample_kernel( read_write image3d_t input,%s __global float *xOffsets, __global float *yOffsets, __global float *zOffsets,  __global %s4 *results %s)\n"
-"{\n"
-"%s"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"%s"
-"   results[offset] = read_image%s( input, coords %s);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( read_write image3d_t input,%s __global float "
+    "*xOffsets, __global float *yOffsets, __global float *zOffsets,  __global "
+    "%s4 *results %s)\n"
+    "{\n"
+    "%s"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "%s"
+    "   results[offset] = read_image%s( input, coords %s);\n"
+    "}";
 
 const char *offset3DKernelSource =
 "   int offset = tidZ*get_image_width(input)*get_image_height(input) + tidY*get_image_width(input) + tidX;\n";
@@ -137,15 +145,16 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
         KernelSourcePattern = read_write3DKernelSourcePattern;
     }
 
-    sprintf( programSrc,
-            KernelSourcePattern,
-            samplerArg, get_explicit_type_name( outputType ),
-            gTestMipmaps? ", float lod": " ",
-            samplerVar,
-            gTestMipmaps? offset3DLodKernelSource: offset3DKernelSource,
-            floatCoords ? float3DUnnormalizedCoordKernelSource : int3DCoordKernelSource,
-            readFormat,
-            gTestMipmaps? ",lod":" ");
+    sprintf(programSrc, KernelSourcePattern,
+            gTestMipmaps
+                ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable"
+                : "",
+            samplerArg, get_explicit_type_name(outputType),
+            gTestMipmaps ? ", float lod" : " ", samplerVar,
+            gTestMipmaps ? offset3DLodKernelSource : offset3DKernelSource,
+            floatCoords ? float3DUnnormalizedCoordKernelSource
+                        : int3DCoordKernelSource,
+            readFormat, gTestMipmaps ? ",lod" : " ");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_1D.cpp b/test_conformance/images/kernel_read_write/test_write_1D.cpp
index 1556a76a..5f726796 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D.cpp
@@ -27,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor* imageInfo );
 
 const char *readwrite1DKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, read_write image1d_t output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0);\n"
-"   int offset = tidX;\n"
-"   write_image%s( output, tidX %s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, read_write image1d_t "
+    "output %s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0);\n"
+    "   int offset = tidX;\n"
+    "   write_image%s( output, tidX %s, input[ offset ]);\n"
+    "}";
 
 const char *write1DKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, write_only image1d_t output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0);\n"
-"   int offset = tidX;\n"
-"   write_image%s( output, tidX %s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, write_only image1d_t "
+    "output %s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0);\n"
+    "   int offset = tidX;\n"
+    "   write_image%s( output, tidX %s, input[ offset ]);\n"
+    "}";
 
 int test_write_image_1D( cl_device_id device, cl_context context, cl_command_queue queue, cl_kernel kernel,
                      image_descriptor *imageInfo, ExplicitType inputType, MTdata d )
@@ -614,12 +618,14 @@ int test_write_image_1D_set(cl_device_id device, cl_context context,
         KernelSourcePattern = readwrite1DKernelSourcePattern;
     }
 
-    sprintf( programSrc,
-             KernelSourcePattern,
-             get_explicit_type_name( inputType ),
-             gTestMipmaps ? ", int lod" : "",
-             readFormat,
-             gTestMipmaps ? ", lod" :"" );
+    sprintf(
+        programSrc, KernelSourcePattern,
+        gTestMipmaps
+            ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+              "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+            : "",
+        get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+        readFormat, gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
index e9aa8d2a..f9024405 100644
--- a/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_1D_array.cpp
@@ -27,20 +27,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
 
 const char *readwrite1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, read_write image1d_array_t output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, read_write "
+    "image1d_array_t output %s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ]);\n"
+    "}";
 
 const char *write1DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s4 *input, write_only image1d_array_t output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, write_only "
+    "image1d_array_t output %s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+    "}";
 
 const char *offset1DArraySource =
 "   int offset = tidY*get_image_width(output) + tidX;\n";
@@ -637,13 +641,15 @@ int test_write_image_1D_array_set(cl_device_id device, cl_context context,
     }
     // Construct the source
     // Construct the source
-    sprintf( programSrc,
-             KernelSourcePattern,
-             get_explicit_type_name( inputType ),
-             gTestMipmaps ? ", int lod" : "",
-             gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource,
-             readFormat,
-             gTestMipmaps ? ", lod" :"" );
+    sprintf(
+        programSrc, KernelSourcePattern,
+        gTestMipmaps
+            ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+              "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+            : "",
+        get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+        gTestMipmaps ? offset1DArrayLodSource : offset1DArraySource, readFormat,
+        gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
index 5bca7124..c1c56994 100644
--- a/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_2D_array.cpp
@@ -49,20 +49,28 @@ static size_t reduceImageDepth(size_t maxDepth) {
 }
 
 const char *write2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
+    "%s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+    "]);\n"
+    "}";
 
 const char *readwrite2DArrayKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ] );\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
+    "%s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+    "] );\n"
+    "}";
 
 const char *offset2DArrayKernelSource =
 "   int offset = tidZ*get_image_width(output)*get_image_height(output) + tidY*get_image_width(output) + tidX;\n";
@@ -671,15 +679,19 @@ int test_write_image_2D_array_set(cl_device_id device, cl_context context,
     }
     // Construct the source
     // Construct the source
-    sprintf( programSrc,
-             KernelSourcePattern,
-             get_explicit_type_name( inputType ),
-             (format->image_channel_order == CL_DEPTH) ? "" : "4",
-             (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t" : "image2d_array_t",
-             gTestMipmaps ? " , int lod" : "",
-             gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
-             readFormat,
-             gTestMipmaps ? ", lod" : "" );
+    sprintf(
+        programSrc, KernelSourcePattern,
+        gTestMipmaps
+            ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+              "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+            : "",
+        get_explicit_type_name(inputType),
+        (format->image_channel_order == CL_DEPTH) ? "" : "4",
+        (format->image_channel_order == CL_DEPTH) ? "image2d_array_depth_t"
+                                                  : "image2d_array_t",
+        gTestMipmaps ? " , int lod" : "",
+        gTestMipmaps ? offset2DArrayLodKernelSource : offset2DArrayKernelSource,
+        readFormat, gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_3D.cpp b/test_conformance/images/kernel_read_write/test_write_3D.cpp
index d9a69627..9da93695 100644
--- a/test_conformance/images/kernel_read_write/test_write_3D.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_3D.cpp
@@ -46,22 +46,30 @@ static size_t reduceImageDepth(size_t maxDimSize, MTdata& seed) {
 
 
 const char *write3DKernelSourcePattern =
-"%s"
-"__kernel void sample_kernel( __global %s4 *input, write_only image3d_t output %s )\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+    "%s"
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, write_only image3d_t "
+    "output %s )\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+    "]);\n"
+    "}";
 
 const char *readwrite3DKernelSourcePattern =
-"%s"
-"__kernel void sample_kernel( __global %s4 *input, read_write image3d_t output %s )\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = get_global_id(2);\n"
-"%s"
-"   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset ]);\n"
-"}";
+    "%s"
+    "%s\n"
+    "__kernel void sample_kernel( __global %s4 *input, read_write image3d_t "
+    "output %s )\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1), tidZ = "
+    "get_global_id(2);\n"
+    "%s"
+    "   write_image%s( output, (int4)( tidX, tidY, tidZ, 0 ) %s, input[ offset "
+    "]);\n"
+    "}";
 
 const char *khr3DWritesPragma =
 "#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable\n";
@@ -678,14 +686,15 @@ int test_write_image_3D_set(cl_device_id device, cl_context context,
     }
 
     // Construct the source
-    sprintf( programSrc,
-             KernelSourcePattern,
-             gTestMipmaps ? "" : khr3DWritesPragma,
-             get_explicit_type_name( inputType ),
-             gTestMipmaps ? ", int lod" : "",
-             gTestMipmaps ? offset3DLodSource : offset3DSource,
-             readFormat,
-             gTestMipmaps ? ", lod" : "" );
+    sprintf(
+        programSrc, KernelSourcePattern, khr3DWritesPragma,
+        gTestMipmaps
+            ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+              "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+            : "",
+        get_explicit_type_name(inputType), gTestMipmaps ? ", int lod" : "",
+        gTestMipmaps ? offset3DLodSource : offset3DSource, readFormat,
+        gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
diff --git a/test_conformance/images/kernel_read_write/test_write_image.cpp b/test_conformance/images/kernel_read_write/test_write_image.cpp
index 9cc9698c..29626971 100644
--- a/test_conformance/images/kernel_read_write/test_write_image.cpp
+++ b/test_conformance/images/kernel_read_write/test_write_image.cpp
@@ -47,20 +47,24 @@ extern bool validate_float_write_results( float *expected, float *actual, image_
 extern bool validate_half_write_results( cl_half *expected, cl_half *actual, image_descriptor *imageInfo );
 
 const char *writeKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, write_only %s output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s%s *input, write_only %s output "
+    "%s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "   write_image%s( output, (int2)( tidX, tidY ) %s, input[ offset ]);\n"
+    "}";
 
 const char *read_writeKernelSourcePattern =
-"__kernel void sample_kernel( __global %s%s *input, read_write %s output %s)\n"
-"{\n"
-"   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
-"%s"
-"   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
-"}";
+    "%s\n"
+    "__kernel void sample_kernel( __global %s%s *input, read_write %s output "
+    "%s)\n"
+    "{\n"
+    "   int tidX = get_global_id(0), tidY = get_global_id(1);\n"
+    "%s"
+    "   write_image%s( output, (int2)( tidX, tidY )%s, input[ offset ] );\n"
+    "}";
 
 const char *offset2DKernelSource =
 "   int offset = tidY*get_image_width(output) + tidX;\n";
@@ -728,15 +732,19 @@ int test_write_image_set(cl_device_id device, cl_context context,
     }
 
     // Construct the source
-    sprintf( programSrc,
-             KernelSourcePattern,
-             get_explicit_type_name( inputType ),
-             (format->image_channel_order == CL_DEPTH) ? "" : "4",
-             (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t" : "image2d_t",
-             gTestMipmaps ? ", int lod" : "",
-             gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
-             readFormat,
-             gTestMipmaps ? ", lod" : "" );
+    sprintf(
+        programSrc, KernelSourcePattern,
+        gTestMipmaps
+            ? "#pragma OPENCL EXTENSION cl_khr_mipmap_image: enable\n#pragma "
+              "OPENCL EXTENSION cl_khr_mipmap_image_writes: enable"
+            : "",
+        get_explicit_type_name(inputType),
+        (format->image_channel_order == CL_DEPTH) ? "" : "4",
+        (format->image_channel_order == CL_DEPTH) ? "image2d_depth_t"
+                                                  : "image2d_t",
+        gTestMipmaps ? ", int lod" : "",
+        gTestMipmaps ? offset2DLodKernelSource : offset2DKernelSource,
+        readFormat, gTestMipmaps ? ", lod" : "");
 
     ptr = programSrc;
     error = create_single_kernel_helper(context, &program, &kernel, 1, &ptr,
-- 
cgit v1.2.3


From f32f1aeaa20d796210c4c7050695eb2062ddfba1 Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Tue, 17 May 2022 16:54:39 +0100
Subject: Add content to README (#1427)

Fill in the placeholder readme with some basic information on building and
running the project. Information on the conformance submission process and
contributing are also included.

Should help close a few issues referenced in
https://github.com/KhronosGroup/OpenCL-CTS/issues/1096

I don't think this is all the information we want, but is a starting point
from which we can progress. For example, adding the android build instructions
from https://github.com/KhronosGroup/OpenCL-CTS/pull/1021
---
 README.md | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 796f7c86..3d410644 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,115 @@
-# OpenCL-CTS
-The OpenCL Conformance Tests
+# OpenCL Conformance Test Suite (CTS)
+
+This it the OpenCL CTS for all versions of the Khronos
+[OpenCL](https://www.khronos.org/opencl/) standard.
+
+## Building the CTS
+
+The CTS supports Linux, Windows, macOS, and Android platforms. In particular,
+GitHub Actions CI builds against Ubuntu 20.04, Windows-latest, and
+macos-latest.
+
+Compiling the CTS requires the following CMake configuration options to be set:
+
+* `CL_INCLUDE_DIR` Points to the unified
+  [OpenCL-Headers](https://github.com/KhronosGroup/OpenCL-Headers).
+* `CL_LIB_DIR` Directory containing the OpenCL library to build against.
+* `OPENCL_LIBRARIES` Name of the OpenCL library to link.
+
+It is advised that the [OpenCL ICD-Loader](https://github.com/KhronosGroup/OpenCL-ICD-Loader)
+is used as the OpenCL library to build against. Where `CL_LIB_DIR` points to a
+build of the ICD loader and `OPENCL_LIBRARIES` is "OpenCL".
+
+### Example Build
+
+Steps on a Linux platform to clone dependencies from GitHub sources, configure
+a build, and compile.
+
+```sh
+git clone https://github.com/KhronosGroup/OpenCL-CTS.git
+git clone https://github.com/KhronosGroup/OpenCL-Headers.git
+git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
+
+mkdir OpenCL-ICD-Loader/build
+cmake -S OpenCL-ICD-Loader -B OpenCL-ICD-Loader/build \
+      -DOPENCL_ICD_LOADER_HEADERS_DIR=$PWD/OpenCL-Headers
+cmake --build ./OpenCL-ICD-Loader/build --config Release
+
+mkdir OpenCL-CTS/build
+cmake -S OpenCL-CTS -B OpenCL-CTS/build \
+      -DCL_INCLUDE_DIR=$PWD/OpenCL-Headers \
+      -DCL_LIB_DIR=$PWD/OpenCL-ICD-Loader/build \
+      -DOPENCL_LIBRARIES=OpenCL
+cmake --build OpenCL-CTS/build --config Release
+```
+
+## Running the CTS
+
+A build of the CTS contains multiple executables representing the directories in
+the `test_conformance` folder. Each of these executables contains sub-tests, and
+possibly smaller granularities of testing within the sub-tests.
+
+See the `--help` output on each executable for the list of sub-tests available,
+as well as other options for configuring execution.
+
+If the OpenCL library built against is the ICD Loader, and the vendor library to
+be tested is not registered in the
+[default ICD Loader location](https://github.com/KhronosGroup/OpenCL-ICD-Loader#registering-icds)
+then the [OCL_ICD_FILENAMES](https://github.com/KhronosGroup/OpenCL-ICD-Loader#table-of-debug-environment-variables)
+environment variable will need to be set for the ICD Loader to detect the OpenCL
+library to use at runtime. For example, to run the basic tests on a Linux
+platform:
+
+```sh
+OCL_ICD_FILENAMES=/path/to/vendor_lib.so ./test_basic
+```
+
+### Offline Compilation
+
+Testing OpenCL drivers which do not have a runtime compiler can be done by using
+additional command line arguments provided by the test harness for tests which
+require compilation, these are:
+
+* `--compilation-mode` Selects if OpenCL-C source code should be compiled using
+  an external tool before being passed on to the OpenCL driver in that form for
+  testing. Online is the default mode, but also accepts the values `spir-v`, and
+  `binary`.
+
+* `--compilation-cache-mode` Controls how the compiled OpenCL-C source code
+  should be cached on disk.
+
+* `--compilation-cache-path` Accepts a path to a directory where the compiled
+  binary cache should be stored on disk.
+
+* `--compilation-program` Accepts a path to an executable (default:
+   cl_offline_compiler) invoked by the test harness to perform offline
+   compilation of OpenCL-C source code.  This executable must match the
+   [interface description](test_common/harness/cl_offline_compiler-interface.txt).
+
+## Generating a Conformance Report
+
+The Khronos [Conformance Process Document](https://members.khronos.org/document/dl/911)
+details the steps required for a conformance submissions.
+In this repository [opencl_conformance_tests_full.csv](test_conformance/submission_details_template.txt)
+defines the full list of tests which must be run for conformance. The output log
+of which must be included alongside a filled in
+[submission details template](test_conformance/submission_details_template.txt).
+
+Utility script [run_conformance.py](test_conformance/run_conformance.py) can be
+used to help generating the submission log, although it is not required.
+
+Git [tags](https://github.com/KhronosGroup/OpenCL-CTS/tags) are used to define
+the version of the repository conformance submissions are made against.
+
+## Contributing
+
+Contributions are welcome to the project from Khronos members and non-members
+alike via GitHub Pull Requests (PR). Alternatively, if you've found a bug or have
+a questions please file an issue in the GitHub project. First time contributors
+will be required to sign the Khronos Contributor License Agreement (CLA) before
+their PR can be merged.
+
+PRs to the repository are required to be `clang-format` clean to pass CI.
+Developers can either use the `git-clang-format` tool locally to verify this
+before contributing, or update their PR based on the diff provided by a failing
+CI job.
-- 
cgit v1.2.3


From 3bf46004ef4f6308bc49b1e22b1c7824a7a0e626 Mon Sep 17 00:00:00 2001
From: paulfradgley <39525348+paulfradgley@users.noreply.github.com>
Date: Tue, 31 May 2022 16:55:42 +0100
Subject: Fixes incorrect slice pitch calculation in clCopyImage 1Darray
 (#1258)

The slice pitch/padding calculation assumed that the 'height' variable contained the pixel height of the image, which it doesn't for IMAGE1D_ARRAY.
Fixes #1257
---
 test_conformance/images/clCopyImage/test_copy_generic.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test_conformance/images/clCopyImage/test_copy_generic.cpp b/test_conformance/images/clCopyImage/test_copy_generic.cpp
index bd935e7f..3bd1b6ef 100644
--- a/test_conformance/images/clCopyImage/test_copy_generic.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_generic.cpp
@@ -228,6 +228,11 @@ cl_mem create_image( cl_context context, cl_command_queue queue, BufferOwningPtr
         }
         size_t mappedSlicePad = mappedSlice - (mappedRow * height);
 
+        // For 1Darray, the height variable actually contains the arraysize,
+        // so it can't be used for calculating the slice padding.
+        if (imageInfo->type == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+            mappedSlicePad = mappedSlice - (mappedRow * 1);
+
         // Copy the image.
         size_t scanlineSize = row_pitch_lod;
         size_t sliceSize = slice_pitch_lod - scanlineSize * height;
-- 
cgit v1.2.3


From 7c65afc4e71b6b6b6023b598f9f675ebfeffc8a5 Mon Sep 17 00:00:00 2001
From: jansol <jhs@psonet.com>
Date: Tue, 7 Jun 2022 18:55:43 +0300
Subject: test_compiler_defines_for_extensions: fix overflow (#1430)

GCC 11.2.0 warns about a possible string overflow (when
num_not_supported_extensions+num_of_supported_extensions == 0)
since no space would be allocated for the terminating
null byte that string manipulation fns expect to find.

This unconditionally adds an extra byte to the allocation to silence
the warning and fix building with -Werror.
---
 .../compiler/test_compiler_defines_for_extensions.cpp         | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 1519779a..84b7798f 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -322,8 +322,15 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
     }
 
     // Build the kernel
-    char *kernel_code = (char*)malloc(1025*256*(num_not_supported_extensions+num_of_supported_extensions));
-    memset(kernel_code, 0, 1025*256*(num_not_supported_extensions+num_of_supported_extensions));
+    char *kernel_code = (char *)malloc(
+        1
+        + 1025 * 256
+            * (num_not_supported_extensions + num_of_supported_extensions));
+    memset(
+        kernel_code, 0,
+        1
+            + 1025 * 256
+                * (num_not_supported_extensions + num_of_supported_extensions));
 
     int i, index = 0;
     strcat(kernel_code, kernel_strings[0]);
-- 
cgit v1.2.3


From c2aca7d8e6a6ec2162a1c68b127409aa9931974d Mon Sep 17 00:00:00 2001
From: paulfradgley <39525348+paulfradgley@users.noreply.github.com>
Date: Tue, 14 Jun 2022 16:47:06 +0100
Subject: Fix local memory out of bounds issue in atomic_fence (replaces PR
 #1285) (#1437)

* Fix local memory out of bounds in atomic_fence

In the error condition, the atomic_fence kernel can illegally access local memory addresses.

In this snippet, localValues is in the local address space and provided as a kernel argument. Its size is effectively get_local_size(0) * sizeof(int). The stores to localValues lead to OoB accesses.

  size_t myId = get_local_id(0);

  ...

  if(hisAtomicValue != hisValue)
  { // fail
    atomic_store(&destMemory[myId], myValue-1);
    hisId = (hisId+get_local_size(0)-1)%get_local_size(0);
    if(myValue+1 < 1)
      localValues[myId*1+myValue+1] = hisId;
    if(myValue+2 < 1)
      localValues[myId*1+myValue+2] = hisAtomicValue;
    if(myValue+3 < 1)
      localValues[myId*1+myValue+3] = hisValue;
  }

* Fix formatting

* Fix formatting again

* Formatting
---
 test_conformance/c11_atomics/common.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 42fe32b6..5bb9e5b7 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -1360,8 +1360,10 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
         {
             error =
                 clSetKernelArg(kernel, argInd++,
-                               LocalRefValues() ? typeSize * CurrentGroupSize()
-                                       * NumNonAtomicVariablesPerThread()
+                               LocalRefValues() ? typeSize
+                                       * ((CurrentGroupSize()
+                                           * NumNonAtomicVariablesPerThread())
+                                          + 4)
                                                 : 1,
                                NULL);
             test_error(error, "Unable to set indexed kernel argument");
-- 
cgit v1.2.3


From f1c051afb1484dca7cacdf66383f5e8a6e6bec32 Mon Sep 17 00:00:00 2001
From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com>
Date: Tue, 14 Jun 2022 16:48:59 +0100
Subject: Added missing tests for integer_dot_product_input_4x8bit and
 integer_dot_product_input_4x8bit_packed on feature_macro compiler test.
 (#1432)

* Added integer_dot_product_input_4x8bit and integer_dot_product_input_4x8bit_packed tests to feature_macro_test

* clang formatting

* Now the test checks whether the array of optional features returned by clGetDeviceInfo contains the standard optional features we are testing.

* Update test_conformance/compiler/test_feature_macro.cpp

Added printing the missing standard feature it it is not found inside the optional features array returned by clGetDeviceInfo.

Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>

Co-authored-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
---
 test_conformance/compiler/test_feature_macro.cpp | 98 +++++++++++++++++++++---
 1 file changed, 89 insertions(+), 9 deletions(-)

diff --git a/test_conformance/compiler/test_feature_macro.cpp b/test_conformance/compiler/test_feature_macro.cpp
index ac355dd4..ef3c0028 100644
--- a/test_conformance/compiler/test_feature_macro.cpp
+++ b/test_conformance/compiler/test_feature_macro.cpp
@@ -579,6 +579,78 @@ int test_feature_macro_fp64(cl_device_id deviceID, cl_context context,
                                         compiler_status, supported);
 }
 
+int test_feature_macro_integer_dot_product_input_4x8bit_packed(
+    cl_device_id deviceID, cl_context context, std::string test_macro_name,
+    cl_bool& supported)
+{
+    cl_int error = TEST_FAIL;
+    cl_bool api_status;
+    cl_bool compiler_status;
+    log_info("\n%s ...\n", test_macro_name.c_str());
+
+    if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+    {
+        supported = false;
+        return TEST_PASS;
+    }
+
+    error = check_api_feature_info_capabilities<
+        cl_device_integer_dot_product_capabilities_khr>(
+        deviceID, context, api_status,
+        CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+        CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_PACKED_KHR);
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    error = check_compiler_feature_info(deviceID, context, test_macro_name,
+                                        compiler_status);
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    return feature_macro_verify_results(test_macro_name, api_status,
+                                        compiler_status, supported);
+}
+
+int test_feature_macro_integer_dot_product_input_4x8bit(
+    cl_device_id deviceID, cl_context context, std::string test_macro_name,
+    cl_bool& supported)
+{
+    cl_int error = TEST_FAIL;
+    cl_bool api_status;
+    cl_bool compiler_status;
+    log_info("\n%s ...\n", test_macro_name.c_str());
+
+    if (!is_extension_available(deviceID, "cl_khr_integer_dot_product"))
+    {
+        supported = false;
+        return TEST_PASS;
+    }
+
+    error = check_api_feature_info_capabilities<
+        cl_device_integer_dot_product_capabilities_khr>(
+        deviceID, context, api_status,
+        CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR,
+        CL_DEVICE_INTEGER_DOT_PRODUCT_INPUT_4x8BIT_KHR);
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    error = check_compiler_feature_info(deviceID, context, test_macro_name,
+                                        compiler_status);
+    if (error != CL_SUCCESS)
+    {
+        return error;
+    }
+
+    return feature_macro_verify_results(test_macro_name, api_status,
+                                        compiler_status, supported);
+}
+
 int test_feature_macro_int64(cl_device_id deviceID, cl_context context,
                              std::string test_macro_name, cl_bool& supported)
 {
@@ -686,15 +758,6 @@ int test_consistency_c_features_list(cl_device_id deviceID,
     sort(vec_to_cmp.begin(), vec_to_cmp.end());
     sort(vec_device_feature_names.begin(), vec_device_feature_names.end());
 
-    if (vec_device_feature_names == vec_to_cmp)
-    {
-        log_info("Comparison list of features - passed\n");
-    }
-    else
-    {
-        log_info("Comparison list of features - failed\n");
-        error = TEST_FAIL;
-    }
     log_info(
         "Supported features based on CL_DEVICE_OPENCL_C_FEATURES API query:\n");
     for (auto each_f : vec_device_feature_names)
@@ -703,11 +766,26 @@ int test_consistency_c_features_list(cl_device_id deviceID,
     }
 
     log_info("\nSupported features based on queries to API/compiler :\n");
+
     for (auto each_f : vec_to_cmp)
     {
         log_info("%s\n", each_f.c_str());
     }
 
+    for (auto each_f : vec_to_cmp)
+    {
+        if (find(vec_device_feature_names.begin(),
+                 vec_device_feature_names.end(), each_f)
+            == vec_device_feature_names.end())
+        {
+            log_info("Comparison list of features - failed - missing %s\n",
+                     each_f.c_str());
+            return TEST_FAIL;
+        }
+    }
+
+    log_info("Comparison list of features - passed\n");
+
     return error;
 }
 
@@ -748,6 +826,8 @@ int test_features_macro(cl_device_id deviceID, cl_context context,
     NEW_FEATURE_MACRO_TEST(images);
     NEW_FEATURE_MACRO_TEST(fp64);
     NEW_FEATURE_MACRO_TEST(int64);
+    NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit);
+    NEW_FEATURE_MACRO_TEST(integer_dot_product_input_4x8bit_packed);
 
     error |= test_consistency_c_features_list(deviceID, supported_features_vec);
 
-- 
cgit v1.2.3


From 67ac6c8d2d1b2e8ee9d6b775be459759ec301bf9 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he@intel.com>
Date: Tue, 14 Jun 2022 23:51:39 +0800
Subject: Fix test_half async_work_group_copy arguments (#1298) (#1299)

Workitems in the last workgroup calls async_work_group_copy with
different argument values depending on 'adjust'. According to spec,
this results in undefined values.
---
 test_conformance/half/Test_vStoreHalf.cpp | 35 ++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index 85824a9f..3ca5920b 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -422,7 +422,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "__kernel void test( __global float *p, __global half *f,\n"
             "                   uint extra_last_thread )\n"
             "{\n"
-            "   __local ushort data[3*(", local_buf_size, "+1)];\n"
+            "   __local ushort data[3*(",
+            local_buf_size,
+            "+1)];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   size_t last_i = get_global_size(0)-1;\n"
@@ -432,9 +434,18 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   } "
-            "   vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
             "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
-            "   async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+            "   if (get_group_id(0) == (get_num_groups(0) - 1) &&\n"
+            "       extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   }\n"
+            "   async_event = async_work_group_copy(\n"
+            "       (__global ushort*)(f+3*(i-lid)),\n"
+            "       (__local ushort *)(&data[adjust]),\n"
+            "       lsize*3-adjust, 0);\n" // investigate later
             "   wait_group_events(1, &async_event);\n"
             "}\n"
         };
@@ -524,7 +535,9 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "__kernel void test( __global double *p, __global half *f,\n"
             "                   uint extra_last_thread )\n"
             "{\n"
-            "   __local ushort data[3*(", local_buf_size, "+1)];\n"
+            "   __local ushort data[3*(",
+            local_buf_size,
+            "+1)];\n"
             "   size_t i = get_global_id(0);\n"
             "   size_t lid = get_local_id(0);\n"
             "   size_t last_i = get_global_size(0)-1;\n"
@@ -534,15 +547,23 @@ int Test_vStoreHalf_private( cl_device_id device, f2h referenceFunc, d2h doubleR
             "   if(last_i == i && extra_last_thread != 0) {\n"
             "     adjust = 3-extra_last_thread;\n"
             "   }\n "
-            "   vstore_half3",roundName,"( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
+            "   vstore_half3",
+            roundName,
+            "( vload3(i,p-adjust), lid, (__local half *)(&data[0]) );\n"
             "   barrier( CLK_LOCAL_MEM_FENCE ); \n"
-            "   async_event = async_work_group_copy((__global ushort *)(f+3*(i-lid)), (__local ushort *)(&data[adjust]), lsize*3-adjust, 0);\n" // investigate later
+            "   if (get_group_id(0) == (get_num_groups(0) - 1) &&\n"
+            "       extra_last_thread != 0) {\n"
+            "     adjust = 3-extra_last_thread;\n"
+            "   }\n"
+            "   async_event = async_work_group_copy(\n"
+            "       (__global ushort *)(f+3*(i-lid)),\n"
+            "       (__local ushort *)(&data[adjust]),\n"
+            "       lsize*3-adjust, 0);\n" // investigate later
             "   wait_group_events(1, &async_event);\n"
             "}\n"
         };
 
 
-
         if(g_arrVecSizes[vectorSize] == 3) {
             programs[vectorSize][0] = MakeProgram( device, source_v3, sizeof(source_v3) / sizeof( source_v3[0]) );
         } else {
-- 
cgit v1.2.3


From 0b7118186af0f146dd044909c677bed7869c1363 Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 21 Jun 2022 21:51:47 +0530
Subject: Initial CTS for external semaphore and memory extensions (#1390)

* Initial CTS for external sharing extensions

Initial set of tests for below extensions
with Vulkan as producer
1. cl_khr_external_memory
2. cl_khr_external_memory_win32
3. cl_khr_external_memory_opaque_fd
4. cl_khr_external_semaphore
5. cl_khr_external_semaphore_win32
6. cl_khr_external_semaphore_opaque_fd

* Updates to external sharing CTS

Updates to external sharing CTS
1. Fix some build issues to remove unnecessary, non-existent files
2. Add new tests for platform and device queries.
3. Some added checks for VK Support.

* Update CTS build script for Vulkan Headers

Update CTS build to clone Vulkan Headers
repo and pass it to CTS build
in preparation for external memory
and semaphore tests

* Fix Vulkan header path

Fix Vulkan header include path.

* Add Vulkan loader dependency

Vulkan loader is required to build
test_vulkan of OpenCL-CTS.
Clone and build Vulkan loader as prerequisite
to OpenCL-CTS.

* Fix Vulkan loader path in test_vulkan

Remove arch/os suffix in Vulkan loader path
to match vulkan loader repo build.

* Fix warnings around getHandle API.

Return type of getHandle is defined
differently based on win or linux builds.
Use appropriate guards when using API
at other places.
While at it remove duplicate definition
of ARRAY_SIZE.

* Use ARRAY_SIZE in harness.

Use already defined ARRAY_SIZE macro
from test_harness.

* Fix build issues for test_vulkan

Fix build issues for test_vulkan
1. Add cl_ext.h in common files
2. Replace cl_mem_properties_khr with cl_mem_properties
3. Replace cl_external_mem_handle_type_khr with
cl_external_memory_handle_type_khr
4. Type-cast malloc as required.

* Fix code formatting.

Fix code formatting to
get CTS CI builds clean.

* Fix formatting fixes part-2

Another set of formatting fixes.

* Fix code formatting part-3

Some more code formatting fixes.

* Fix code formatting issues part-4

More code formatting fixes.

* Formatting fixes part-5

Some more formatting fixes

* Fix formatting part-6

More formatting fixes continued.

* Code formatting fixes part-7

Code formatting fixes for image

* Code formatting fixes part-8

Fixes for platform and device query tests.

* Code formatting fixes part-9

More formatting fixes for vulkan_wrapper

* Code formatting fixes part-10

More fixes to wrapper header

* Code formatting fixes part-11

Formatting fixes for api_list

* Code formatting fixes part-12

Formatting fixes for api_list_map.

* Code formatting changes part-13

Code formatting changes for utility.

* Code formatting fixes part-15
Formatting fixes for wrapper.

* Misc Code formatting fixes

Some more misc code formatting fixes.

* Fix build breaks due to code formatting

Fix build issues arised with recent
code formatting issues.

* Fix presubmit script after merge

Fix presubmit script after merge conflicts.

* Fix Vulkan loader build in presubmit script.

Use cmake ninja and appropriate toolchain
for Vulkan loader dependency to fix
linking issue on arm/aarch64.

* Use static array sizes

Use static array sizes to fix
windows builds.

* Some left-out formatting fixes.

Fix remaining formatting issues.

* Fix harness header path

Fix harness header path
While at it, remove Misc and test pragma.

* Add/Fix license information

Add Khronos License info for test_vulkan.
Replace Apple license with Khronos
as applicable.

* Fix headers for Mac OSX builds.

Use appropriate headers for
Mac OSX builds

* Fix Mac OSX builds.

Use appropriate headers for
Mac OSX builds.
Also, fix some build issues
due to type-casting.

* Fix new code formatting issues

Fix new code formatting issues
with recent MacOS fixes.

* Add back missing case statement

Add back missing case statement
that was accidentally removed.

* Disable USE_GAS for Vulkan Loader build.

Disable USE_GAS for Vulkan Loader build
to fix aarch64 build.

* Update Copyright Year.

Update Copyright Year to 2022
for external memory sharing tests.

* Android specific fixes

Android specific fixes to
external sharing tests.
---
 presubmit.sh                                       |   18 +-
 test_common/harness/kernelHelpers.cpp              |   12 +-
 test_conformance/CMakeLists.txt                    |    1 +
 test_conformance/subgroups/subhelpers.h            |    1 +
 test_conformance/vulkan/CMakeLists.txt             |   50 +
 test_conformance/vulkan/main.cpp                   |  344 ++++
 test_conformance/vulkan/procs.h                    |   38 +
 .../vulkan/test_vulkan_api_consistency.cpp         |  568 ++++++
 .../vulkan/test_vulkan_interop_buffer.cpp          | 1808 +++++++++++++++++
 .../vulkan/test_vulkan_interop_image.cpp           | 1648 ++++++++++++++++
 .../vulkan/test_vulkan_platform_device_info.cpp    |  146 ++
 .../opencl_vulkan_wrapper.cpp                      |  818 ++++++++
 .../opencl_vulkan_wrapper.hpp                      |  129 ++
 .../vulkan_interop_common/vulkan_api_list.hpp      |  195 ++
 .../vulkan_interop_common.cpp                      |   22 +
 .../vulkan_interop_common.hpp                      |   50 +
 .../vulkan_interop_common/vulkan_list_map.cpp      |  424 ++++
 .../vulkan_interop_common/vulkan_list_map.hpp      |  389 ++++
 .../vulkan_interop_common/vulkan_utility.cpp       |  693 +++++++
 .../vulkan_interop_common/vulkan_utility.hpp       |   69 +
 .../vulkan_interop_common/vulkan_wrapper.cpp       | 2075 ++++++++++++++++++++
 .../vulkan_interop_common/vulkan_wrapper.hpp       |  579 ++++++
 .../vulkan_interop_common/vulkan_wrapper_types.hpp |  463 +++++
 23 files changed, 10535 insertions(+), 5 deletions(-)
 create mode 100644 test_conformance/vulkan/CMakeLists.txt
 create mode 100644 test_conformance/vulkan/main.cpp
 create mode 100644 test_conformance/vulkan/procs.h
 create mode 100644 test_conformance/vulkan/test_vulkan_api_consistency.cpp
 create mode 100644 test_conformance/vulkan/test_vulkan_interop_buffer.cpp
 create mode 100644 test_conformance/vulkan/test_vulkan_interop_image.cpp
 create mode 100644 test_conformance/vulkan/test_vulkan_platform_device_info.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
 create mode 100644 test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp

diff --git a/presubmit.sh b/presubmit.sh
index b63a4373..6c3a293e 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -40,6 +40,9 @@ if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
     BUILD_OPENGL_TEST="ON"
 fi
 
+#Vulkan Headers
+git clone https://github.com/KhronosGroup/Vulkan-Headers.git
+
 # Get and build loader
 git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
 cd ${TOP}/OpenCL-ICD-Loader
@@ -48,6 +51,16 @@ cd build
 cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
 cmake --build . -j2 --config Release
 
+#Vulkan Loader
+cd ${TOP}
+git clone https://github.com/KhronosGroup/Vulkan-Loader.git
+cd Vulkan-Loader
+mkdir build
+cd build
+python3 ../scripts/update_deps.py
+cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DBUILD_WSI_XLIB_SUPPORT=OFF -DBUILD_WSI_XCB_SUPPORT=OFF -DBUILD_WSI_WAYLAND_SUPPORT=OFF -DUSE_GAS=OFF -C helper.cmake ..
+cmake --build . -j2 --config Release
+
 # Build CTS
 cd ${TOP}
 ls -l
@@ -68,6 +81,9 @@ cmake .. -G Ninja \
       -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=./bin \
       -DOPENCL_LIBRARIES="${CMAKE_OPENCL_LIBRARIES_OPTION}" \
       -DUSE_CL_EXPERIMENTAL=ON \
-      -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST}
+      -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
+      -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \
+      -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/
 cmake --build . -j3 --config Release
 
+
diff --git a/test_common/harness/kernelHelpers.cpp b/test_common/harness/kernelHelpers.cpp
index 1d1f8d8c..13ebcbc9 100644
--- a/test_common/harness/kernelHelpers.cpp
+++ b/test_common/harness/kernelHelpers.cpp
@@ -1661,8 +1661,10 @@ Version get_device_latest_cl_c_version(cl_device_id device)
         Version max_supported_cl_c_version{};
         for (const auto &name_version : name_versions)
         {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
+            Version current_version{
+                static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
+                static_cast<int>(CL_VERSION_MINOR(name_version.version))
+            };
             max_supported_cl_c_version =
                 (current_version > max_supported_cl_c_version)
                 ? current_version
@@ -1745,8 +1747,10 @@ bool device_supports_cl_c_version(cl_device_id device, Version version)
 
         for (const auto &name_version : name_versions)
         {
-            Version current_version{ CL_VERSION_MAJOR(name_version.version),
-                                     CL_VERSION_MINOR(name_version.version) };
+            Version current_version{
+                static_cast<int>(CL_VERSION_MAJOR(name_version.version)),
+                static_cast<int>(CL_VERSION_MINOR(name_version.version))
+            };
             if (current_version == version)
             {
                 return true;
diff --git a/test_conformance/CMakeLists.txt b/test_conformance/CMakeLists.txt
index 363ece86..f9514f1e 100644
--- a/test_conformance/CMakeLists.txt
+++ b/test_conformance/CMakeLists.txt
@@ -52,6 +52,7 @@ add_subdirectory( pipes )
 add_subdirectory( device_timer )
 add_subdirectory( spirv_new )
 add_subdirectory( spir )
+add_subdirectory( vulkan )
 
 file(GLOB CSV_FILES "opencl_conformance_tests_*.csv")
 
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index c73027dc..12704db8 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -55,6 +55,7 @@ static cl_uint4 bs128_to_cl_uint4(bs128 v)
 
 struct WorkGroupParams
 {
+
     WorkGroupParams(size_t gws, size_t lws, int dm_arg = -1, int cs_arg = -1)
         : global_workgroup_size(gws), local_workgroup_size(lws),
           divergence_mask_arg(dm_arg), cluster_size_arg(cs_arg)
diff --git a/test_conformance/vulkan/CMakeLists.txt b/test_conformance/vulkan/CMakeLists.txt
new file mode 100644
index 00000000..4f43172a
--- /dev/null
+++ b/test_conformance/vulkan/CMakeLists.txt
@@ -0,0 +1,50 @@
+set (MODULE_NAME VULKAN)
+
+if(WIN32)
+    list(APPEND CLConform_LIBRARIES vulkan-1)
+else(WIN32)
+    list(APPEND CLConform_LIBRARIES vulkan dl)
+endif(WIN32)
+set(CMAKE_CXX_FLAGS "-fpermissive")
+if(WIN32) 
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DVK_USE_PLATFORM_WIN32_KHR")
+endif(WIN32)
+
+set (CLConform_VULKAN_LIBRARIES_DIR "${VULKAN_LIB_DIR}")
+
+link_directories(${CLConform_VULKAN_LIBRARIES_DIR})
+
+list(APPEND CLConform_INCLUDE_DIR ${VULKAN_INCLUDE_DIR})
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+
+include_directories (${CLConform_INCLUDE_DIR})
+
+set (${MODULE_NAME}_SOURCES
+        main.cpp
+        test_vulkan_interop_buffer.cpp
+        test_vulkan_interop_image.cpp
+        test_vulkan_api_consistency.cpp
+        test_vulkan_platform_device_info.cpp
+        vulkan_interop_common/vulkan_wrapper.cpp
+        vulkan_interop_common/vulkan_interop_common.cpp
+        vulkan_interop_common/opencl_vulkan_wrapper.cpp
+        vulkan_interop_common/vulkan_utility.cpp
+        vulkan_interop_common/vulkan_list_map.cpp
+        ../../test_common/harness/genericThread.cpp
+        ../../test_common/harness/errorHelpers.cpp
+        ../../test_common/harness/testHarness.cpp
+        ../../test_common/harness/kernelHelpers.cpp
+        ../../test_common/harness/mt19937.cpp
+        ../../test_common/harness/msvc9.c
+        ../../test_common/harness/parseParameters.cpp
+        ../../test_common/harness/deviceInfo.cpp
+        ../../test_common/harness/crc32.cpp
+    )
+
+set_source_files_properties(
+    ${${MODULE_NAME}_SOURCES}
+    PROPERTIES LANGUAGE CXX)
+include_directories("./vulkan_interop_common/")
+
+include(../CMakeCommon.txt)
diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
new file mode 100644
index 00000000..6cbde5cc
--- /dev/null
+++ b/test_conformance/vulkan/main.cpp
@@ -0,0 +1,344 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#if !defined(_WIN32)
+#include <stdbool.h>
+#endif
+
+#include <math.h>
+#include <string.h>
+
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#else
+#include <OpenCL/cl.h>
+#endif
+
+
+#include "procs.h"
+#include "harness/testHarness.h"
+#include "harness/parseParameters.h"
+#include "harness/deviceInfo.h"
+
+#if !defined(_WIN32)
+#include <unistd.h>
+#endif
+#include <vulkan_interop_common.hpp>
+#include <vulkan_wrapper.hpp>
+
+#define BUFFERSIZE 3000
+
+static void params_reset()
+{
+    numCQ = 1;
+    multiImport = false;
+    multiCtx = false;
+}
+
+extern int test_buffer_common(cl_device_id device_, cl_context context_,
+                              cl_command_queue queue_, int numElements_);
+extern int test_image_common(cl_device_id device_, cl_context context_,
+                             cl_command_queue queue_, int numElements_);
+
+int test_buffer_single_queue(cl_device_id device_, cl_context context_,
+                             cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+    return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiple_queue(cl_device_id device_, cl_context context_,
+                               cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    numCQ = 2;
+    log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
+    return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiImport_sameCtx(cl_device_id device_, cl_context context_,
+                                    cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    multiImport = true;
+    log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+             "IN SAME CONTEXT...... \n\n");
+    return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_buffer_multiImport_diffCtx(cl_device_id device_, cl_context context_,
+                                    cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    multiImport = true;
+    multiCtx = true;
+    log_info("RUNNING TEST WITH MULTIPLE DEVICE MEMORY IMPORT "
+             "IN DIFFERENT CONTEXT...... \n\n");
+    return test_buffer_common(device_, context_, queue_, numElements_);
+}
+int test_image_single_queue(cl_device_id device_, cl_context context_,
+                            cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+    return test_image_common(device_, context_, queue_, numElements_);
+}
+int test_image_multiple_queue(cl_device_id device_, cl_context context_,
+                              cl_command_queue queue_, int numElements_)
+{
+    params_reset();
+    numCQ = 2;
+    log_info("RUNNING TEST WITH TWO QUEUE...... \n\n");
+    return test_image_common(device_, context_, queue_, numElements_);
+}
+
+test_definition test_list[] = { ADD_TEST(buffer_single_queue),
+                                ADD_TEST(buffer_multiple_queue),
+                                ADD_TEST(buffer_multiImport_sameCtx),
+                                ADD_TEST(buffer_multiImport_diffCtx),
+                                ADD_TEST(image_single_queue),
+                                ADD_TEST(image_multiple_queue),
+                                ADD_TEST(consistency_external_buffer),
+                                ADD_TEST(consistency_external_image),
+                                ADD_TEST(consistency_external_semaphore),
+                                ADD_TEST(platform_info),
+                                ADD_TEST(device_info) };
+
+const int test_num = ARRAY_SIZE(test_list);
+
+cl_device_type gDeviceType = CL_DEVICE_TYPE_DEFAULT;
+char *choosen_platform_name = NULL;
+cl_platform_id platform = NULL;
+cl_int choosen_platform_index = -1;
+char platform_name[1024] = "";
+cl_platform_id select_platform = NULL;
+char *extensions = NULL;
+size_t extensionSize = 0;
+cl_uint num_devices = 0;
+cl_uint device_no = 0;
+cl_device_id *devices;
+const size_t bufsize = BUFFERSIZE;
+char buf[BUFFERSIZE];
+cl_uchar uuid[CL_UUID_SIZE_KHR];
+VulkanDevice vkDevice;
+unsigned int numCQ;
+bool multiImport;
+bool multiCtx;
+bool debug_trace = false;
+bool useSingleImageKernel = false;
+bool useDeviceLocal = false;
+bool disableNTHandleType = false;
+bool enableOffset = false;
+bool non_dedicated = false;
+
+static void printUsage(const char *execName)
+{
+    const char *p = strrchr(execName, '/');
+    if (p != NULL) execName = p + 1;
+
+    log_info("Usage: %s [test_names] [options]\n", execName);
+    log_info("Test names:\n");
+    for (int i = 0; i < test_num; i++)
+    {
+        log_info("\t%s\n", test_list[i].name);
+    }
+    log_info("\n");
+    log_info("Options:\n");
+    log_info("\t--debug_trace - Enables additional debug info logging\n");
+    log_info("\t--non_dedicated - Choose dedicated Vs. non_dedicated \n");
+}
+
+size_t parseParams(int argc, const char *argv[], const char **argList)
+{
+    size_t argCount = 1;
+    for (int i = 1; i < argc; i++)
+    {
+        if (argv[i] == NULL) break;
+        if (argv[i][0] == '-')
+        {
+            if (!strcmp(argv[i], "--debug_trace"))
+            {
+                debug_trace = true;
+            }
+            if (!strcmp(argv[i], "--useSingleImageKernel"))
+            {
+                useSingleImageKernel = true;
+            }
+            if (!strcmp(argv[i], "--useDeviceLocal"))
+            {
+                useDeviceLocal = true;
+            }
+            if (!strcmp(argv[i], "--disableNTHandleType"))
+            {
+                disableNTHandleType = true;
+            }
+            if (!strcmp(argv[i], "--enableOffset"))
+            {
+                enableOffset = true;
+            }
+            if (!strcmp(argv[i], "--non_dedicated"))
+            {
+                non_dedicated = true;
+            }
+            if (strcmp(argv[i], "-h") == 0)
+            {
+                printUsage(argv[0]);
+                argCount = 0; // Returning argCount=0 to assert error in main()
+                break;
+            }
+        }
+        else
+        {
+            argList[argCount] = argv[i];
+            argCount++;
+        }
+    }
+    return argCount;
+}
+
+int main(int argc, const char *argv[])
+{
+    int errNum = 0;
+
+    test_start();
+    params_reset();
+
+    if (!checkVkSupport())
+    {
+        log_info("Vulkan supported GPU not found \n");
+        return 0;
+    }
+
+    cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU;
+    char *force_cpu = getenv("CL_DEVICE_TYPE");
+    if (force_cpu != NULL)
+    {
+        if (strcmp(force_cpu, "gpu") == 0
+            || strcmp(force_cpu, "CL_DEVICE_TYPE_GPU") == 0)
+            requestedDeviceType = CL_DEVICE_TYPE_GPU;
+        else if (strcmp(force_cpu, "cpu") == 0
+                 || strcmp(force_cpu, "CL_DEVICE_TYPE_CPU") == 0)
+            requestedDeviceType = CL_DEVICE_TYPE_CPU;
+        else if (strcmp(force_cpu, "accelerator") == 0
+                 || strcmp(force_cpu, "CL_DEVICE_TYPE_ACCELERATOR") == 0)
+            requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
+        else if (strcmp(force_cpu, "CL_DEVICE_TYPE_DEFAULT") == 0)
+            requestedDeviceType = CL_DEVICE_TYPE_DEFAULT;
+    }
+
+    if (requestedDeviceType != CL_DEVICE_TYPE_GPU)
+    {
+        log_info("Vulkan tests can only run on a GPU device.\n");
+        return 0;
+    }
+    gDeviceType = CL_DEVICE_TYPE_GPU;
+
+    const char **argList = (const char **)calloc(argc, sizeof(char *));
+    size_t argCount = parseParams(argc, argv, argList);
+    if (argCount == 0) return 0;
+    // get the platform ID
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    if (errNum != CL_SUCCESS)
+    {
+        print_error(errNum, "Error: Failed to get platform\n");
+        return errNum;
+    }
+
+    errNum =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+    if (CL_SUCCESS != errNum)
+    {
+        print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
+        return errNum;
+    }
+    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+    if (NULL == devices)
+    {
+        print_error(errNum, "Unable to allocate memory for devices\n");
+        return CL_OUT_OF_HOST_MEMORY;
+    }
+    errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+                            NULL);
+    if (CL_SUCCESS != errNum)
+    {
+        print_error(errNum, "Failed to get deviceID.\n");
+        return errNum;
+    }
+    for (device_no = 0; device_no < num_devices; device_no++)
+    {
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+                                 NULL, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            log_error("Error in clGetDeviceInfo for getting "
+                      "device_extension size....\n");
+            return errNum;
+        }
+        extensions = (char *)malloc(extensionSize);
+        if (NULL == extensions)
+        {
+            log_error("Unable to allocate memory for extensions\n");
+            return CL_OUT_OF_HOST_MEMORY;
+        }
+        errNum =
+            clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+                            extensionSize, extensions, NULL /*&extensionSize*/);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum,
+                        "Error in clGetDeviceInfo for getting "
+                        "device_extension\n");
+            return errNum;
+        }
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+                                 CL_UUID_SIZE_KHR, uuid, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum, "clGetDeviceInfo failed with error\n ");
+            return errNum;
+        }
+        errNum =
+            memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+        if (errNum == 0)
+        {
+            break;
+        }
+    }
+    if (device_no >= num_devices)
+    {
+        fprintf(stderr,
+                "OpenCL error: "
+                "No Vulkan-OpenCL Interop capable GPU found.\n");
+    }
+    if (!(is_extension_available(devices[device_no], "cl_khr_external_memory")
+          && is_extension_available(devices[device_no],
+                                    "cl_khr_external_semaphore")))
+    {
+        log_info("Device does not support cl_khr_external_memory "
+                 "or cl_khr_external_semaphore\n");
+        log_info(" TEST SKIPPED\n");
+        return CL_SUCCESS;
+    }
+    init_cl_vk_ext(platform);
+
+    // Execute tests.
+    // Note: don't use the entire harness, because we have a different way of
+    // obtaining the device (via the context)
+    errNum = parseAndCallCommandLineTests(argCount, argList, devices[device_no],
+                                          test_num, test_list, true, 0, 1024);
+    return errNum;
+}
diff --git a/test_conformance/vulkan/procs.h b/test_conformance/vulkan/procs.h
new file mode 100644
index 00000000..37bf7869
--- /dev/null
+++ b/test_conformance/vulkan/procs.h
@@ -0,0 +1,38 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "harness/mt19937.h"
+
+extern int test_vulkan_interop_buffer(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int num_elements);
+extern int test_vulkan_interop_image(cl_device_id device, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_consistency_external_buffer(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_consistency_external_image(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue,
+                                           int num_elements);
+extern int test_consistency_external_semaphore(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue,
+                                               int num_elements);
+extern int test_platform_info(cl_device_id device, cl_context context,
+                              cl_command_queue queue, int num_elements);
+extern int test_device_info(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements);
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
new file mode 100644
index 00000000..2987418f
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -0,0 +1,568 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <vulkan_interop_common.hpp>
+#include <opencl_vulkan_wrapper.hpp>
+#include <vulkan_wrapper.hpp>
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/testHarness.h"
+#include "harness/typeWrappers.h"
+#include "harness/deviceInfo.h"
+
+int test_consistency_external_buffer(cl_device_id deviceID, cl_context _context,
+                                     cl_command_queue _queue, int num_elements)
+{
+    cl_int errNum;
+    VulkanDevice vkDevice;
+    // Context and command queue creation
+    cl_platform_id platform = NULL;
+    cl_context context = NULL;
+    cl_command_queue cmd_queue = NULL;
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    test_error(errNum, "Failed to get platform Id");
+
+    contextProperties[1] = (cl_context_properties)platform;
+
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    test_error(errNum, "Unable to create context with properties");
+
+    cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+    test_error(errNum, "Unable to create command queue");
+
+    uint32_t bufferSize = 32;
+    cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error("Device does not support "
+                                 "cl_khr_external_memory_win32 extension \n");
+    }
+#else
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support "
+            "cl_khr_external_memory_opaque_fd extension \n");
+    }
+#endif
+
+    VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+        getSupportedVulkanExternalMemoryHandleTypeList()[0];
+
+    VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024, vkExternalMemoryHandleType);
+    const VulkanMemoryTypeList& memoryTypeList =
+        vkDummyBuffer.getMemoryTypeList();
+
+    VulkanDeviceMemory* vkDeviceMem = new VulkanDeviceMemory(
+        vkDevice, bufferSize, memoryTypeList[0], vkExternalMemoryHandleType);
+    VulkanBufferList vkBufferList(1, vkDevice, bufferSize,
+                                  vkExternalMemoryHandleType);
+
+    vkDeviceMem->bindBuffer(vkBufferList[0], 0);
+
+    void* handle = NULL;
+    int fd;
+
+    std::vector<cl_mem_properties> extMemProperties{
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+        (cl_mem_properties)devList[0],
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+    };
+    cl_external_memory_handle_type_khr type;
+    switch (vkExternalMemoryHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR;
+            errNum = check_external_memory_handle_type(devList[0], type);
+            extMemProperties.push_back((cl_mem_properties)type);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR;
+            errNum = check_external_memory_handle_type(devList[0], type);
+            extMemProperties.push_back((cl_mem_properties)type);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+#else
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR;
+            errNum = check_external_memory_handle_type(devList[0], type);
+            extMemProperties.push_back((cl_mem_properties)type);
+            extMemProperties.push_back((cl_mem_properties)fd);
+            break;
+#endif
+        default:
+            errNum = TEST_FAIL;
+            log_error("Unsupported external memory handle type \n");
+            break;
+    }
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("Checks failed for "
+                  "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+        return TEST_FAIL;
+    }
+    extMemProperties.push_back(0);
+
+    clMemWrapper buffer;
+
+    // Passing NULL properties and a valid extMem_desc size
+    buffer = clCreateBufferWithProperties(context, NULL, 1, bufferSize, NULL,
+                                          &errNum);
+    test_error(errNum, "Unable to create buffer with NULL properties");
+
+    buffer.reset();
+
+    // Passing valid extMemProperties and buffersize
+    buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1,
+                                          bufferSize, NULL, &errNum);
+    test_error(errNum, "Unable to create buffer with Properties");
+
+    buffer.reset();
+
+    // Not passing external memory handle
+    std::vector<cl_mem_properties> extMemProperties2{
+#ifdef _WIN32
+        (cl_mem_properties)type,
+        NULL, // Passing NULL handle
+#else
+        (cl_mem_properties)type,
+        (cl_mem_properties)-64, // Passing random invalid fd
+#endif
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+        (cl_mem_properties)devList[0],
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+        0
+    };
+    buffer = clCreateBufferWithProperties(context, extMemProperties2.data(), 1,
+                                          bufferSize, NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_VALUE,
+                       "Should return CL_INVALID_VALUE ");
+
+    buffer.reset();
+
+    // Passing extMem_desc size = 0 but valid memProperties, CL_INVALID_SIZE
+    // should be returned.
+    buffer = clCreateBufferWithProperties(context, extMemProperties.data(), 1,
+                                          0, NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_BUFFER_SIZE,
+                       "Should return CL_INVALID_BUFFER_SIZE");
+
+    return TEST_PASS;
+}
+
+int test_consistency_external_image(cl_device_id deviceID, cl_context _context,
+                                    cl_command_queue _queue, int num_elements)
+{
+    cl_int errNum;
+    VulkanDevice vkDevice;
+
+    // Context and command queue creation
+    cl_platform_id platform = NULL;
+    cl_context context = NULL;
+    cl_command_queue cmd_queue = NULL;
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    test_error(errNum, "Failed to get platform id");
+
+    contextProperties[1] = (cl_context_properties)platform;
+
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    test_error(errNum, "Unable to create context with properties");
+
+    cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+    test_error(errNum, "Unable to create command queue");
+
+    cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error("Device does not support"
+                                 "cl_khr_external_memory_win32 extension \n");
+    }
+#else
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_opaque_fd "
+            "extension \n");
+    }
+#endif
+    uint32_t width = 256;
+    uint32_t height = 16;
+    cl_image_desc image_desc;
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    cl_image_format img_format = { 0 };
+
+    VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+        getSupportedVulkanExternalMemoryHandleTypeList()[0];
+    VulkanImage2D* vkImage2D =
+        new VulkanImage2D(vkDevice, VULKAN_FORMAT_R8G8B8A8_UNORM, width, height,
+                          1, vkExternalMemoryHandleType);
+
+    const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
+    uint64_t totalImageMemSize = vkImage2D->getSize();
+
+    log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]);
+    log_info("Memory type property: %d\n",
+             memoryTypeList[0].getMemoryTypeProperty());
+    log_info("Image size : %d\n", totalImageMemSize);
+
+    VulkanDeviceMemory* vkDeviceMem =
+        new VulkanDeviceMemory(vkDevice, totalImageMemSize, memoryTypeList[0],
+                               vkExternalMemoryHandleType);
+    vkDeviceMem->bindImage(*vkImage2D, 0);
+
+    void* handle = NULL;
+    int fd;
+    std::vector<cl_mem_properties> extMemProperties{
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR,
+        (cl_mem_properties)devList[0],
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR,
+    };
+    switch (vkExternalMemoryHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            handle = vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)
+                    CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+            break;
+#else
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            fd = (int)vkDeviceMem->getHandle(vkExternalMemoryHandleType);
+            errNum = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back((cl_mem_properties)fd);
+            break;
+#endif
+        default:
+            errNum = TEST_FAIL;
+            log_error("Unsupported external memory handle type \n");
+            break;
+    }
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("Checks failed for "
+                  "CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR\n");
+        return TEST_FAIL;
+    }
+    extMemProperties.push_back(0);
+
+    const VkImageCreateInfo VulkanImageCreateInfo =
+        vkImage2D->getVkImageCreateInfo();
+
+    errNum = getCLImageInfoFromVkImageInfo(
+        &VulkanImageCreateInfo, totalImageMemSize, &img_format, &image_desc);
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("getCLImageInfoFromVkImageInfo failed!!!");
+        return TEST_FAIL;
+    }
+
+    clMemWrapper image;
+
+    // Pass valid properties, image_desc and image_format
+    image = clCreateImageWithProperties(
+        context, extMemProperties.data(), CL_MEM_READ_WRITE, &img_format,
+        &image_desc, NULL /* host_ptr */, &errNum);
+    test_error(errNum, "Unable to create Image with Properties");
+    image.reset();
+
+    // Passing properties, image_desc and image_format all as NULL
+    image = clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE, NULL,
+                                        NULL, NULL, &errNum);
+    test_failure_error(
+        errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+        "Image creation must fail with CL_INVALID_IMAGE_DESCRIPTOR "
+        "when all are passed as NULL");
+
+    image.reset();
+
+    // Passing NULL properties and a valid image_format and image_desc
+    image =
+        clCreateImageWithProperties(context, NULL, CL_MEM_READ_WRITE,
+                                    &img_format, &image_desc, NULL, &errNum);
+    test_error(errNum,
+               "Unable to create image with NULL properties "
+               "with valid image format and image desc");
+
+    image.reset();
+
+    // Passing image_format as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, NULL, &image_desc,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"
+                       "when image desc passed as NULL");
+
+    image.reset();
+
+    // Passing image_desc as NULL
+    image = clCreateImageWithProperties(context, extMemProperties.data(),
+                                        CL_MEM_READ_WRITE, &img_format, NULL,
+                                        NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_IMAGE_DESCRIPTOR,
+                       "Image creation must fail with "
+                       "CL_INVALID_IMAGE_DESCRIPTOR "
+                       "when image desc passed as NULL");
+    image.reset();
+
+    return TEST_PASS;
+}
+
+int test_consistency_external_semaphore(cl_device_id deviceID,
+                                        cl_context _context,
+                                        cl_command_queue _queue,
+                                        int num_elements)
+{
+    cl_int errNum;
+    VulkanDevice vkDevice;
+    // Context and command queue creation
+    cl_platform_id platform = NULL;
+    cl_context context = NULL;
+    cl_command_queue cmd_queue = NULL;
+
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    test_error(errNum, "Failed to get platform Id");
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+
+    contextProperties[1] = (cl_context_properties)platform;
+
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    test_error(errNum, "Unable to create context with properties");
+
+    cmd_queue = clCreateCommandQueue(context, deviceID, 0, &errNum);
+    test_error(errNum, "Unable to create command queue");
+
+    cl_device_id devList[] = { deviceID, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_semaphore_win32 "
+            "extension \n");
+    }
+#else
+    if (!is_extension_available(devList[0],
+                                "cl_khr_external_semaphore_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support "
+            "cl_khr_external_semaphore_opaque_fd extension \n");
+    }
+#endif
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2Clsemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2Vksemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    cl_semaphore_khr clCl2Vksemaphore;
+    cl_semaphore_khr clVk2Clsemaphore;
+
+    void* handle1 = NULL;
+    void* handle2 = NULL;
+    int fd1, fd2;
+    std::vector<cl_semaphore_properties_khr> sema_props1{
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+    };
+    std::vector<cl_semaphore_properties_khr> sema_props2{
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+    };
+    switch (vkExternalSemaphoreHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            log_info(" Opaque NT handles are only supported on Windows\n");
+            handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+            handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+            errNum = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+            sema_props1.push_back((cl_semaphore_properties_khr)
+                                      CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+            sema_props1.push_back((cl_semaphore_properties_khr)handle1);
+            sema_props2.push_back((cl_semaphore_properties_khr)
+                                      CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+            sema_props2.push_back((cl_semaphore_properties_khr)handle2);
+            break;
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            log_info(" Opaque D3DKMT handles are only supported on Windows\n");
+            handle1 = vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+            handle2 = vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+            errNum = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            sema_props1.push_back((cl_semaphore_properties_khr)
+                                      CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            sema_props1.push_back((cl_semaphore_properties_khr)handle1);
+            sema_props2.push_back((cl_semaphore_properties_khr)
+                                      CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            sema_props2.push_back((cl_semaphore_properties_khr)handle2);
+            break;
+#else
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+            log_info(" Opaque file descriptors are not supported on Windows\n");
+            fd1 =
+                (int)vkVk2Clsemaphore.getHandle(vkExternalSemaphoreHandleType);
+            fd2 =
+                (int)vkCl2Vksemaphore.getHandle(vkExternalSemaphoreHandleType);
+            errNum = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+            sema_props1.push_back(
+                (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+            sema_props1.push_back((cl_semaphore_properties_khr)fd1);
+            sema_props2.push_back(
+                (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+            sema_props2.push_back((cl_semaphore_properties_khr)fd2);
+            break;
+#endif
+        default: log_error("Unsupported external memory handle type\n"); break;
+    }
+    if (CL_SUCCESS != errNum)
+    {
+        throw std::runtime_error(
+            "Unsupported external sempahore handle type\n ");
+    }
+    sema_props1.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+    sema_props1.push_back((cl_semaphore_properties_khr)devList[0]);
+    sema_props1.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+    sema_props2.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+    sema_props2.push_back((cl_semaphore_properties_khr)devList[0]);
+    sema_props2.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+    sema_props1.push_back(0);
+    sema_props2.push_back(0);
+
+    // Pass NULL properties
+    cl_semaphore_khr cl_ext_semaphore =
+        clCreateSemaphoreWithPropertiesKHRptr(context, NULL, &errNum);
+    test_failure_error(errNum, CL_INVALID_VALUE,
+                       "Semaphore creation must fail with CL_INVALID_VALUE "
+                       " when properties are passed as NULL");
+
+
+    // Pass invalid semaphore object to wait
+    errNum =
+        clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL, NULL);
+    test_failure_error(errNum, CL_INVALID_VALUE,
+                       "clEnqueueWaitSemaphoresKHR fails with CL_INVALID_VALUE "
+                       "when invalid semaphore object is passed");
+
+
+    // Pass invalid semaphore object to signal
+    errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, NULL, NULL, 0, NULL,
+                                             NULL);
+    test_failure_error(
+        errNum, CL_INVALID_VALUE,
+        "clEnqueueSignalSemaphoresKHR fails with CL_INVALID_VALUE"
+        "when invalid semaphore object is passed");
+
+
+    // Create two semaphore objects
+    clVk2Clsemaphore = clCreateSemaphoreWithPropertiesKHRptr(
+        context, sema_props1.data(), &errNum);
+    test_error(errNum,
+               "Unable to create semaphore with valid semaphore properties");
+
+    clCl2Vksemaphore = clCreateSemaphoreWithPropertiesKHRptr(
+        context, sema_props2.data(), &errNum);
+    test_error(errNum,
+               "Unable to create semaphore with valid semaphore properties");
+
+
+    // Call Signal twice consecutively
+    errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore,
+                                             NULL, 0, NULL, NULL);
+    test_error(errNum, "clEnqueueSignalSemaphoresKHRptr failed");
+
+    errNum = clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore,
+                                             NULL, 0, NULL, NULL);
+    test_error(errNum,
+               "clEnqueueSignalSemaphoresKHRptr failed for two "
+               "consecutive wait events");
+
+
+    // Call Wait twice consecutively
+    errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clVk2Clsemaphore,
+                                           NULL, 0, NULL, NULL);
+    test_error(errNum, "clEnqueueWaitSemaphoresKHRptr failed");
+
+    errNum = clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &clCl2Vksemaphore,
+                                           NULL, 0, NULL, NULL);
+    test_error(errNum,
+               "clEnqueueWaitSemaphoresKHRptr failed for two "
+               " consecutive wait events");
+
+
+    // Pass invalid object to release call
+    errNum = clReleaseSemaphoreObjectKHRptr(NULL);
+    test_failure_error(errNum, CL_INVALID_VALUE,
+                       "clReleaseSemaphoreObjectKHRptr fails with "
+                       "CL_INVALID_VALUE when NULL semaphore object is passed");
+
+    // Release both semaphore objects
+    errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore);
+    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+
+    errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore);
+    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+
+    return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
new file mode 100644
index 00000000..7daf96de
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -0,0 +1,1808 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <vulkan_interop_common.hpp>
+#include <vulkan_wrapper.hpp>
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include <assert.h>
+#include <vector>
+#include <iostream>
+#include <string.h>
+#include "harness/errorHelpers.h"
+
+#define MAX_BUFFERS 5
+#define MAX_IMPORTS 5
+#define BUFFERSIZE 3000
+static cl_uchar uuid[CL_UUID_SIZE_KHR];
+static cl_device_id deviceId = NULL;
+
+namespace {
+struct Params
+{
+    uint32_t numBuffers;
+    uint32_t bufferSize;
+    uint32_t interBufferOffset;
+};
+}
+
+static const char *vkBufferShader =
+    "#version 450\n"
+    "#extension GL_ARB_separate_shader_objects : enable\n"
+    "#extension GL_NV_gpu_shader5 : enable\n"
+    "layout(binding = 0) buffer Params\n"
+    "{\n"
+    "    uint32_t numBuffers;\n"
+    "    uint32_t bufferSize;\n"
+    "    uint32_t interBufferOffset;\n"
+    "};\n"
+    "layout(binding = 1) buffer Buffer\n"
+    "{\n"
+    "    uint8_t ptr[];\n"
+    "} bufferPtrList[" STRING(
+        MAX_BUFFERS) "];\n"
+                     "layout(local_size_x = 512) in;\n"
+                     "void main() {\n"
+                     "    for (uint32_t bufIdx = 0; bufIdx < numBuffers;"
+                     " bufIdx++) {\n"
+                     "        uint32_t ptrIdx = gl_GlobalInvocationID.x;\n"
+                     "        uint32_t limit = bufferSize;\n"
+                     "        while (ptrIdx < limit) {\n"
+                     "            bufferPtrList[bufIdx].ptr[ptrIdx]++;\n"
+                     "            ptrIdx += (gl_NumWorkGroups.x * "
+                     "gl_WorkGroupSize.x);\n"
+                     "        }\n"
+                     "    }\n"
+                     "}\n";
+
+const char *kernel_text_numbuffer_1 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) {  \n\
+    int gid = get_global_id(0); \n\
+    if (gid < bufferSize) { \n\
+        a[gid]++; \n\
+    } \n\
+}";
+
+const char *kernel_text_numbuffer_2 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b) {  \n\
+    int gid = get_global_id(0); \n\
+    if (gid < bufferSize) { \n\
+        a[gid]++; \n\
+        b[gid]++;\n\
+    } \n\
+}";
+
+const char *kernel_text_numbuffer_4 = " \
+__kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a, __global unsigned char *b, __global unsigned char *c, __global unsigned char *d) {  \n\
+    int gid = get_global_id(0); \n\
+    if (gid < bufferSize) { \n\
+        a[gid]++;\n\
+        b[gid]++; \n\
+        c[gid]++; \n\
+        d[gid]++; \n\
+    } \n\
+}";
+
+
+const char *kernel_text_verify = " \
+__kernel void checkKernel(__global unsigned char *ptr, int size, int expVal, __global unsigned char *err)     \n\
+{                                                                                         \n\
+    int idx = get_global_id(0);                                                           \n\
+    if ((idx < size) && (*err == 0)) { \n\
+    if (ptr[idx] != expVal){           \n\
+            *err = 1;                  \n\
+           }                           \n\
+        }                              \n\
+}";
+
+int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
+                            cl_command_queue &cmd_queue2, cl_kernel *kernel,
+                            cl_kernel &verify_kernel, VulkanDevice &vkDevice,
+                            uint32_t numBuffers, uint32_t bufferSize)
+{
+    int err = CL_SUCCESS;
+    size_t global_work_size[1];
+    uint8_t *error_2;
+    cl_mem error_1;
+    cl_kernel update_buffer_kernel;
+    cl_kernel kernel_cq;
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+    const char *program_source_const = kernel_text_numbuffer_2;
+    size_t program_source_length = strlen(program_source_const);
+    cl_program program = clCreateProgramWithSource(
+        context, 1, &program_source_const, &program_source_length, &err);
+    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "Error: Failed to build program \n");
+        return err;
+    }
+    // create the kernel
+    kernel_cq = clCreateKernel(program, "clUpdateBuffer", &err);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "clCreateKernel failed \n");
+        return err;
+    }
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                            vkBufferShaderModule);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+    const uint32_t maxIter = innerIterations;
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+    std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+    std::vector<clExternalMemory *> externalMemory;
+    for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+         emhtIdx++)
+    {
+        VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+            vkExternalMemoryHandleTypeList[emhtIdx];
+        log_info("External memory handle type: %d\n",
+                 vkExternalMemoryHandleType);
+
+        VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+                                   vkExternalMemoryHandleType);
+        const VulkanMemoryTypeList &memoryTypeList =
+            vkDummyBuffer.getMemoryTypeList();
+
+        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+        {
+            const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+            log_info("Memory type index: %d\n", (uint32_t)memoryType);
+            log_info("Memory type property: %d\n",
+                     memoryType.getMemoryTypeProperty());
+
+            VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
+                                          vkExternalMemoryHandleType);
+
+            for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+            {
+                vkBufferListDeviceMemory.push_back(
+                    new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
+                                           vkExternalMemoryHandleType));
+                externalMemory.push_back(new clExternalMemory(
+                    vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
+                    0, bufferSize, context, deviceId));
+            }
+            cl_mem buffers[MAX_BUFFERS];
+            clFinish(cmd_queue1);
+            Params *params = (Params *)vkParamsDeviceMemory.map();
+            params->numBuffers = numBuffers;
+            params->bufferSize = bufferSize;
+            params->interBufferOffset = 0;
+            vkParamsDeviceMemory.unmap();
+            vkDescriptorSet.update(0, vkParamsBuffer);
+            for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+            {
+                size_t buffer_size = vkBufferList[bIdx].getSize();
+                vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
+                                                           0);
+                buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
+                vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
+            }
+            vkCommandBuffer.begin();
+            vkCommandBuffer.bindPipeline(vkComputePipeline);
+            vkCommandBuffer.bindDescriptorSets(
+                vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+            vkCommandBuffer.dispatch(512, 1, 1);
+            vkCommandBuffer.end();
+
+            if (vkBufferList.size() == 2)
+            {
+                update_buffer_kernel = kernel[0];
+            }
+            else if (vkBufferList.size() == 3)
+            {
+                update_buffer_kernel = kernel[1];
+            }
+            else if (vkBufferList.size() == 5)
+            {
+                update_buffer_kernel = kernel[2];
+            }
+            // global work size should be less than or equal to
+            // bufferSizeList[i]
+            global_work_size[0] = bufferSize;
+            for (uint32_t iter = 0; iter < maxIter; iter++)
+            {
+
+                if (iter == 0)
+                {
+                    vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+                }
+                else
+                {
+                    vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+                                   vkVk2CLSemaphore);
+                }
+                clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+                err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
+                                     (void *)&bufferSize);
+                err |= clSetKernelArg(kernel_cq, 0, sizeof(uint32_t),
+                                      (void *)&bufferSize);
+                err |= clSetKernelArg(kernel_cq, 1, sizeof(cl_mem),
+                                      (void *)&(buffers[0]));
+
+                for (int i = 0; i < vkBufferList.size() - 1; i++)
+                {
+                    err |=
+                        clSetKernelArg(update_buffer_kernel, i + 1,
+                                       sizeof(cl_mem), (void *)&(buffers[i]));
+                }
+
+                err |=
+                    clSetKernelArg(kernel_cq, 2, sizeof(cl_mem),
+                                   (void *)&(buffers[vkBufferList.size() - 1]));
+
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to set arg values for kernel\n");
+                    goto CLEANUP;
+                }
+                cl_event first_launch;
+
+                err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
+                                             1, NULL, global_work_size, NULL, 0,
+                                             NULL, &first_launch);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to launch update_buffer_kernel,"
+                                "error\n");
+                    goto CLEANUP;
+                }
+
+                err = clEnqueueNDRangeKernel(cmd_queue2, kernel_cq, 1, NULL,
+                                             global_work_size, NULL, 1,
+                                             &first_launch, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to launch update_buffer_kernel,"
+                                "error\n");
+                    goto CLEANUP;
+                }
+
+                if (iter != (maxIter - 1))
+                {
+                    clCl2VkExternalSemaphore->signal(cmd_queue2);
+                }
+            }
+            error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+            if (NULL == error_2)
+            {
+                log_error("Not able to allocate memory\n");
+                goto CLEANUP;
+            }
+            clFinish(cmd_queue2);
+            error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                     sizeof(uint8_t), NULL, &err);
+            if (CL_SUCCESS != err)
+            {
+                print_error(err, "Error: clCreateBuffer \n");
+                goto CLEANUP;
+            }
+            uint8_t val = 0;
+            err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                       sizeof(uint8_t), &val, 0, NULL, NULL);
+            if (err != CL_SUCCESS)
+            {
+                print_error(err, "Error: Failed read output, error\n");
+                goto CLEANUP;
+            }
+
+            int calc_max_iter;
+            for (int i = 0; i < vkBufferList.size(); i++)
+            {
+                if (i == 0)
+                    calc_max_iter = (maxIter * 3);
+                else
+                    calc_max_iter = (maxIter * 2);
+                err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+                                     (void *)&(buffers[i]));
+                err |=
+                    clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
+                err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+                                      &calc_max_iter);
+                err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+                                      (void *)&error_1);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to set arg values for "
+                                "verify_kernel \n");
+                    goto CLEANUP;
+                }
+                err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
+                                             global_work_size, NULL, 0, NULL,
+                                             NULL);
+
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to launch verify_kernel,"
+                                "error \n");
+                    goto CLEANUP;
+                }
+                err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                          sizeof(uint8_t), error_2, 0, NULL,
+                                          NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, "Error: Failed read output, error \n ");
+                    goto CLEANUP;
+                }
+                if (*error_2 == 1)
+                {
+                    log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+                    goto CLEANUP;
+                }
+            }
+            for (size_t i = 0; i < vkBufferList.size(); i++)
+            {
+                delete vkBufferListDeviceMemory[i];
+                delete externalMemory[i];
+            }
+            vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+                                           vkBufferListDeviceMemory.begin()
+                                               + numBuffers);
+            externalMemory.erase(externalMemory.begin(),
+                                 externalMemory.begin() + numBuffers);
+        }
+    }
+CLEANUP:
+    for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+    {
+        if (vkBufferListDeviceMemory[i])
+        {
+            delete vkBufferListDeviceMemory[i];
+        }
+        if (externalMemory[i])
+        {
+            delete externalMemory[i];
+        }
+    }
+    if (program) clReleaseProgram(program);
+    if (kernel_cq) clReleaseKernel(kernel_cq);
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+    if (error_2) free(error_2);
+    if (error_1) clReleaseMemObject(error_1);
+
+    return err;
+}
+
+int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
+                            cl_kernel *kernel, cl_kernel &verify_kernel,
+                            VulkanDevice &vkDevice, uint32_t numBuffers,
+                            uint32_t bufferSize)
+{
+    log_info("RUNNING TEST WITH ONE QUEUE...... \n\n");
+    size_t global_work_size[1];
+    uint8_t *error_2;
+    cl_mem error_1;
+    cl_kernel update_buffer_kernel;
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+    int err = CL_SUCCESS;
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                            vkBufferShaderModule);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    const uint32_t maxIter = innerIterations;
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+    std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+    std::vector<clExternalMemory *> externalMemory;
+
+    for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+         emhtIdx++)
+    {
+        VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+            vkExternalMemoryHandleTypeList[emhtIdx];
+        log_info("External memory handle type: %d\n",
+                 vkExternalMemoryHandleType);
+
+        VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+                                   vkExternalMemoryHandleType);
+        const VulkanMemoryTypeList &memoryTypeList =
+            vkDummyBuffer.getMemoryTypeList();
+
+        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+        {
+            const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+            log_info("Memory type index: %d\n", (uint32_t)memoryType);
+            log_info("Memory type property: %d\n",
+                     memoryType.getMemoryTypeProperty());
+
+            VulkanBufferList vkBufferList(numBuffers, vkDevice, bufferSize,
+                                          vkExternalMemoryHandleType);
+
+            for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+            {
+                vkBufferListDeviceMemory.push_back(
+                    new VulkanDeviceMemory(vkDevice, bufferSize, memoryType,
+                                           vkExternalMemoryHandleType));
+                externalMemory.push_back(new clExternalMemory(
+                    vkBufferListDeviceMemory[bIdx], vkExternalMemoryHandleType,
+                    0, bufferSize, context, deviceId));
+            }
+            cl_mem buffers[4];
+            clFinish(cmd_queue1);
+            Params *params = (Params *)vkParamsDeviceMemory.map();
+            params->numBuffers = numBuffers;
+            params->bufferSize = bufferSize;
+            params->interBufferOffset = 0;
+            vkParamsDeviceMemory.unmap();
+            vkDescriptorSet.update(0, vkParamsBuffer);
+            for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+            {
+                size_t buffer_size = vkBufferList[bIdx].getSize();
+                vkBufferListDeviceMemory[bIdx]->bindBuffer(vkBufferList[bIdx],
+                                                           0);
+                buffers[bIdx] = externalMemory[bIdx]->getExternalMemoryBuffer();
+                vkDescriptorSet.update((uint32_t)bIdx + 1, vkBufferList[bIdx]);
+            }
+            vkCommandBuffer.begin();
+            vkCommandBuffer.bindPipeline(vkComputePipeline);
+            vkCommandBuffer.bindDescriptorSets(
+                vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+            vkCommandBuffer.dispatch(512, 1, 1);
+            vkCommandBuffer.end();
+
+            if (vkBufferList.size() == 1)
+            {
+                update_buffer_kernel = kernel[0];
+            }
+            else if (vkBufferList.size() == 2)
+            {
+                update_buffer_kernel = kernel[1];
+            }
+            else if (vkBufferList.size() == 4)
+            {
+                update_buffer_kernel = kernel[2];
+            }
+
+            // global work size should be less than or equal to
+            // bufferSizeList[i]
+            global_work_size[0] = bufferSize;
+
+            for (uint32_t iter = 0; iter < maxIter; iter++)
+            {
+                if (iter == 0)
+                {
+                    vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+                }
+                else
+                {
+                    vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+                                   vkVk2CLSemaphore);
+                }
+                clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+                err = clSetKernelArg(update_buffer_kernel, 0, sizeof(uint32_t),
+                                     (void *)&bufferSize);
+                for (int i = 0; i < vkBufferList.size(); i++)
+                {
+                    err |=
+                        clSetKernelArg(update_buffer_kernel, i + 1,
+                                       sizeof(cl_mem), (void *)&(buffers[i]));
+                }
+
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to set arg values for kernel\n");
+                    goto CLEANUP;
+                }
+                err = clEnqueueNDRangeKernel(cmd_queue1, update_buffer_kernel,
+                                             1, NULL, global_work_size, NULL, 0,
+                                             NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err,
+                                "Error: Failed to launch update_buffer_kernel,"
+                                " error\n");
+                    goto CLEANUP;
+                }
+                if (iter != (maxIter - 1))
+                {
+                    clCl2VkExternalSemaphore->signal(cmd_queue1);
+                }
+            }
+            error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+            if (NULL == error_2)
+            {
+                log_error("Not able to allocate memory\n");
+                goto CLEANUP;
+            }
+
+            error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                     sizeof(uint8_t), NULL, &err);
+            if (CL_SUCCESS != err)
+            {
+                print_error(err, "Error: clCreateBuffer \n");
+                goto CLEANUP;
+            }
+            uint8_t val = 0;
+            err = clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                       sizeof(uint8_t), &val, 0, NULL, NULL);
+            if (CL_SUCCESS != err)
+            {
+                print_error(err, "Error: clEnqueueWriteBuffer \n");
+                goto CLEANUP;
+            }
+
+            int calc_max_iter = (maxIter * 2);
+            for (int i = 0; i < vkBufferList.size(); i++)
+            {
+                err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+                                     (void *)&(buffers[i]));
+                err |=
+                    clSetKernelArg(verify_kernel, 1, sizeof(int), &bufferSize);
+                err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+                                      &calc_max_iter);
+                err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+                                      (void *)&error_1);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(
+                        err,
+                        "Error: Failed to set arg values for verify_kernel \n");
+                    goto CLEANUP;
+                }
+                err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1, NULL,
+                                             global_work_size, NULL, 0, NULL,
+                                             NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(
+                        err, "Error: Failed to launch verify_kernel, error\n");
+                    goto CLEANUP;
+                }
+
+                err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                          sizeof(uint8_t), error_2, 0, NULL,
+                                          NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, "Error: Failed read output, error  \n");
+                    goto CLEANUP;
+                }
+                if (*error_2 == 1)
+                {
+                    log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+                    goto CLEANUP;
+                }
+            }
+            for (size_t i = 0; i < vkBufferList.size(); i++)
+            {
+                delete vkBufferListDeviceMemory[i];
+                delete externalMemory[i];
+            }
+            vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+                                           vkBufferListDeviceMemory.begin()
+                                               + numBuffers);
+            externalMemory.erase(externalMemory.begin(),
+                                 externalMemory.begin() + numBuffers);
+        }
+    }
+CLEANUP:
+    for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+    {
+        if (vkBufferListDeviceMemory[i])
+        {
+            delete vkBufferListDeviceMemory[i];
+        }
+        if (externalMemory[i])
+        {
+            delete externalMemory[i];
+        }
+    }
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+    if (error_2) free(error_2);
+    if (error_1) clReleaseMemObject(error_1);
+    return err;
+}
+
+int run_test_with_multi_import_same_ctx(
+    cl_context &context, cl_command_queue &cmd_queue1, cl_kernel *kernel,
+    cl_kernel &verify_kernel, VulkanDevice &vkDevice, uint32_t numBuffers,
+    uint32_t bufferSize, uint32_t bufferSizeForOffset)
+{
+    size_t global_work_size[1];
+    uint8_t *error_2;
+    cl_mem error_1;
+    int numImports = numBuffers;
+    cl_kernel update_buffer_kernel[MAX_IMPORTS];
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+    int err = CL_SUCCESS;
+    int calc_max_iter;
+    bool withOffset;
+    uint32_t pBufferSize;
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                            vkBufferShaderModule);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    const uint32_t maxIter = innerIterations;
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+    std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+    std::vector<std::vector<clExternalMemory *>> externalMemory;
+
+
+    for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+         emhtIdx++)
+    {
+        VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+            vkExternalMemoryHandleTypeList[emhtIdx];
+        log_info("External memory handle type: %d\n",
+                 vkExternalMemoryHandleType);
+
+        VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+                                   vkExternalMemoryHandleType);
+        const VulkanMemoryTypeList &memoryTypeList =
+            vkDummyBuffer.getMemoryTypeList();
+
+        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+        {
+            const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+            log_info("Memory type index: %d\n", (uint32_t)memoryType);
+            log_info("Memory type property: %d\n",
+                     memoryType.getMemoryTypeProperty());
+            for (unsigned int withOffset = 0;
+                 withOffset <= (unsigned int)enableOffset; withOffset++)
+            {
+                log_info("Running withOffset case %d\n", (uint32_t)withOffset);
+                if (withOffset)
+                {
+                    pBufferSize = bufferSizeForOffset;
+                }
+                else
+                {
+                    pBufferSize = bufferSize;
+                }
+                cl_mem buffers[MAX_BUFFERS][MAX_IMPORTS];
+                VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
+                                              vkExternalMemoryHandleType);
+                uint32_t interBufferOffset =
+                    (uint32_t)(vkBufferList[0].getSize());
+
+                for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+                {
+                    if (withOffset == 0)
+                    {
+                        vkBufferListDeviceMemory.push_back(
+                            new VulkanDeviceMemory(vkDevice, pBufferSize,
+                                                   memoryType,
+                                                   vkExternalMemoryHandleType));
+                    }
+                    if (withOffset == 1)
+                    {
+                        uint32_t totalSize =
+                            (uint32_t)(vkBufferList.size() * interBufferOffset);
+                        vkBufferListDeviceMemory.push_back(
+                            new VulkanDeviceMemory(vkDevice, totalSize,
+                                                   memoryType,
+                                                   vkExternalMemoryHandleType));
+                    }
+                    std::vector<clExternalMemory *> pExternalMemory;
+                    for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+                    {
+                        pExternalMemory.push_back(new clExternalMemory(
+                            vkBufferListDeviceMemory[bIdx],
+                            vkExternalMemoryHandleType,
+                            withOffset * bIdx * interBufferOffset, pBufferSize,
+                            context, deviceId));
+                    }
+                    externalMemory.push_back(pExternalMemory);
+                }
+
+                clFinish(cmd_queue1);
+                Params *params = (Params *)vkParamsDeviceMemory.map();
+                params->numBuffers = numBuffers;
+                params->bufferSize = pBufferSize;
+                params->interBufferOffset = interBufferOffset * withOffset;
+                vkParamsDeviceMemory.unmap();
+                vkDescriptorSet.update(0, vkParamsBuffer);
+                for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+                {
+                    size_t buffer_size = vkBufferList[bIdx].getSize();
+                    vkBufferListDeviceMemory[bIdx]->bindBuffer(
+                        vkBufferList[bIdx],
+                        bIdx * interBufferOffset * withOffset);
+                    for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+                    {
+                        buffers[bIdx][cl_bIdx] =
+                            externalMemory[bIdx][cl_bIdx]
+                                ->getExternalMemoryBuffer();
+                    }
+                    vkDescriptorSet.update((uint32_t)bIdx + 1,
+                                           vkBufferList[bIdx]);
+                }
+                vkCommandBuffer.begin();
+                vkCommandBuffer.bindPipeline(vkComputePipeline);
+                vkCommandBuffer.bindDescriptorSets(
+                    vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+                vkCommandBuffer.dispatch(512, 1, 1);
+                vkCommandBuffer.end();
+                for (int i = 0; i < numImports; i++)
+                {
+                    update_buffer_kernel[i] = (numBuffers == 1)
+                        ? kernel[0]
+                        : ((numBuffers == 2) ? kernel[1] : kernel[2]);
+                }
+                // global work size should be less than or equal to
+                // bufferSizeList[i]
+                global_work_size[0] = pBufferSize;
+
+                for (uint32_t iter = 0; iter < maxIter; iter++)
+                {
+                    if (iter == 0)
+                    {
+                        vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+                    }
+                    else
+                    {
+                        vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+                                       vkVk2CLSemaphore);
+                    }
+                    clVk2CLExternalSemaphore->wait(cmd_queue1);
+                    for (uint8_t launchIter = 0; launchIter < numImports;
+                         launchIter++)
+                    {
+                        err = clSetKernelArg(update_buffer_kernel[launchIter],
+                                             0, sizeof(uint32_t),
+                                             (void *)&pBufferSize);
+                        for (int i = 0; i < numBuffers; i++)
+                        {
+                            err |= clSetKernelArg(
+                                update_buffer_kernel[launchIter], i + 1,
+                                sizeof(cl_mem),
+                                (void *)&(buffers[i][launchIter]));
+                        }
+
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to set arg values for "
+                                        "kernel\n ");
+                            goto CLEANUP;
+                        }
+                        err = clEnqueueNDRangeKernel(
+                            cmd_queue1, update_buffer_kernel[launchIter], 1,
+                            NULL, global_work_size, NULL, 0, NULL, NULL);
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to launch "
+                                        "update_buffer_kernel, error\n ");
+                            goto CLEANUP;
+                        }
+                    }
+                    if (iter != (maxIter - 1))
+                    {
+                        clCl2VkExternalSemaphore->signal(cmd_queue1);
+                    }
+                }
+                error_2 = (uint8_t *)malloc(sizeof(uint8_t));
+                if (NULL == error_2)
+                {
+                    log_error("Not able to allocate memory\n");
+                    goto CLEANUP;
+                }
+
+                error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                         sizeof(uint8_t), NULL, &err);
+                if (CL_SUCCESS != err)
+                {
+                    print_error(err, "Error: clCreateBuffer \n");
+                    goto CLEANUP;
+                }
+                uint8_t val = 0;
+                err =
+                    clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                         sizeof(uint8_t), &val, 0, NULL, NULL);
+                if (CL_SUCCESS != err)
+                {
+                    print_error(err, "Error: clEnqueueWriteBuffer \n");
+                    goto CLEANUP;
+                }
+                calc_max_iter = maxIter * (numBuffers + 1);
+
+                for (int i = 0; i < vkBufferList.size(); i++)
+                {
+                    err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+                                         (void *)&(buffers[i][0]));
+                    err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
+                                          &pBufferSize);
+                    err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+                                          &calc_max_iter);
+                    err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+                                          (void *)&error_1);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err,
+                                    "Error: Failed to set arg values for "
+                                    "verify_kernel \n");
+                        goto CLEANUP;
+                    }
+                    err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
+                                                 NULL, global_work_size, NULL,
+                                                 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(
+                            err,
+                            "Error: Failed to launch verify_kernel, error\n");
+                        goto CLEANUP;
+                    }
+
+                    err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                              sizeof(uint8_t), error_2, 0, NULL,
+                                              NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err, "Error: Failed read output, error \n");
+                        goto CLEANUP;
+                    }
+                    if (*error_2 == 1)
+                    {
+                        log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+                        goto CLEANUP;
+                    }
+                }
+                for (size_t i = 0; i < vkBufferList.size(); i++)
+                {
+                    for (size_t j = 0; j < numImports; j++)
+                    {
+                        delete externalMemory[i][j];
+                    }
+                }
+                for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+                {
+                    delete vkBufferListDeviceMemory[i];
+                }
+                vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+                                               vkBufferListDeviceMemory.end());
+                for (size_t i = 0; i < externalMemory.size(); i++)
+                {
+                    externalMemory[i].erase(externalMemory[i].begin(),
+                                            externalMemory[i].begin()
+                                                + numBuffers);
+                }
+                externalMemory.clear();
+            }
+        }
+    }
+CLEANUP:
+    for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+    {
+        if (vkBufferListDeviceMemory[i])
+        {
+            delete vkBufferListDeviceMemory[i];
+        }
+    }
+    for (size_t i = 0; i < externalMemory.size(); i++)
+    {
+        for (size_t j = 0; j < externalMemory[i].size(); j++)
+        {
+            if (externalMemory[i][j])
+            {
+                delete externalMemory[i][j];
+            }
+        }
+    }
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+    if (error_2) free(error_2);
+    if (error_1) clReleaseMemObject(error_1);
+    return err;
+}
+
+int run_test_with_multi_import_diff_ctx(
+    cl_context &context, cl_context &context2, cl_command_queue &cmd_queue1,
+    cl_command_queue &cmd_queue2, cl_kernel *kernel1, cl_kernel *kernel2,
+    cl_kernel &verify_kernel, cl_kernel verify_kernel2, VulkanDevice &vkDevice,
+    uint32_t numBuffers, uint32_t bufferSize, uint32_t bufferSizeForOffset)
+{
+    size_t global_work_size[1];
+    uint8_t *error_3;
+    cl_mem error_1;
+    cl_mem error_2;
+    int numImports = numBuffers;
+    cl_kernel update_buffer_kernel1[MAX_IMPORTS];
+    cl_kernel update_buffer_kernel2[MAX_IMPORTS];
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+    clExternalSemaphore *clVk2CLExternalSemaphore2 = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore2 = NULL;
+    int err = CL_SUCCESS;
+    int calc_max_iter;
+    bool withOffset;
+    uint32_t pBufferSize;
+
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+    VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                            vkBufferShaderModule);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+    clVk2CLExternalSemaphore2 = new clExternalSemaphore(
+        vkVk2CLSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore2 = new clExternalSemaphore(
+        vkCl2VkSemaphore, context2, vkExternalSemaphoreHandleType, deviceId);
+
+    const uint32_t maxIter = innerIterations;
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCommandBuffer(vkDevice, vkCommandPool);
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+    std::vector<VulkanDeviceMemory *> vkBufferListDeviceMemory;
+    std::vector<std::vector<clExternalMemory *>> externalMemory1;
+    std::vector<std::vector<clExternalMemory *>> externalMemory2;
+
+    for (size_t emhtIdx = 0; emhtIdx < vkExternalMemoryHandleTypeList.size();
+         emhtIdx++)
+    {
+        VulkanExternalMemoryHandleType vkExternalMemoryHandleType =
+            vkExternalMemoryHandleTypeList[emhtIdx];
+        log_info("External memory handle type:%d\n",
+                 vkExternalMemoryHandleType);
+
+        VulkanBuffer vkDummyBuffer(vkDevice, 4 * 1024,
+                                   vkExternalMemoryHandleType);
+        const VulkanMemoryTypeList &memoryTypeList =
+            vkDummyBuffer.getMemoryTypeList();
+
+        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+        {
+            const VulkanMemoryType &memoryType = memoryTypeList[mtIdx];
+
+            log_info("Memory type index: %d\n", (uint32_t)memoryType);
+            log_info("Memory type property: %d\n",
+                     memoryType.getMemoryTypeProperty());
+
+            for (unsigned int withOffset = 0;
+                 withOffset <= (unsigned int)enableOffset; withOffset++)
+            {
+                log_info("Running withOffset case %d\n", (uint32_t)withOffset);
+                cl_mem buffers1[MAX_BUFFERS][MAX_IMPORTS];
+                cl_mem buffers2[MAX_BUFFERS][MAX_IMPORTS];
+                if (withOffset)
+                {
+                    pBufferSize = bufferSizeForOffset;
+                }
+                else
+                {
+                    pBufferSize = bufferSize;
+                }
+                VulkanBufferList vkBufferList(numBuffers, vkDevice, pBufferSize,
+                                              vkExternalMemoryHandleType);
+                uint32_t interBufferOffset =
+                    (uint32_t)(vkBufferList[0].getSize());
+
+                for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+                {
+                    if (withOffset == 0)
+                    {
+                        vkBufferListDeviceMemory.push_back(
+                            new VulkanDeviceMemory(vkDevice, pBufferSize,
+                                                   memoryType,
+                                                   vkExternalMemoryHandleType));
+                    }
+                    if (withOffset == 1)
+                    {
+                        uint32_t totalSize =
+                            (uint32_t)(vkBufferList.size() * interBufferOffset);
+                        vkBufferListDeviceMemory.push_back(
+                            new VulkanDeviceMemory(vkDevice, totalSize,
+                                                   memoryType,
+                                                   vkExternalMemoryHandleType));
+                    }
+                    std::vector<clExternalMemory *> pExternalMemory1;
+                    std::vector<clExternalMemory *> pExternalMemory2;
+                    for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+                    {
+                        pExternalMemory1.push_back(new clExternalMemory(
+                            vkBufferListDeviceMemory[bIdx],
+                            vkExternalMemoryHandleType,
+                            withOffset * bIdx * interBufferOffset, pBufferSize,
+                            context, deviceId));
+                        pExternalMemory2.push_back(new clExternalMemory(
+                            vkBufferListDeviceMemory[bIdx],
+                            vkExternalMemoryHandleType,
+                            withOffset * bIdx * interBufferOffset, pBufferSize,
+                            context2, deviceId));
+                    }
+                    externalMemory1.push_back(pExternalMemory1);
+                    externalMemory2.push_back(pExternalMemory2);
+                }
+
+                clFinish(cmd_queue1);
+                Params *params = (Params *)vkParamsDeviceMemory.map();
+                params->numBuffers = numBuffers;
+                params->bufferSize = pBufferSize;
+                params->interBufferOffset = interBufferOffset * withOffset;
+                vkParamsDeviceMemory.unmap();
+                vkDescriptorSet.update(0, vkParamsBuffer);
+                for (size_t bIdx = 0; bIdx < vkBufferList.size(); bIdx++)
+                {
+                    size_t buffer_size = vkBufferList[bIdx].getSize();
+                    vkBufferListDeviceMemory[bIdx]->bindBuffer(
+                        vkBufferList[bIdx],
+                        bIdx * interBufferOffset * withOffset);
+                    for (size_t cl_bIdx = 0; cl_bIdx < numImports; cl_bIdx++)
+                    {
+                        buffers1[bIdx][cl_bIdx] =
+                            externalMemory1[bIdx][cl_bIdx]
+                                ->getExternalMemoryBuffer();
+                        buffers2[bIdx][cl_bIdx] =
+                            externalMemory2[bIdx][cl_bIdx]
+                                ->getExternalMemoryBuffer();
+                    }
+                    vkDescriptorSet.update((uint32_t)bIdx + 1,
+                                           vkBufferList[bIdx]);
+                }
+
+                vkCommandBuffer.begin();
+                vkCommandBuffer.bindPipeline(vkComputePipeline);
+                vkCommandBuffer.bindDescriptorSets(
+                    vkComputePipeline, vkPipelineLayout, vkDescriptorSet);
+                vkCommandBuffer.dispatch(512, 1, 1);
+                vkCommandBuffer.end();
+
+                for (int i = 0; i < numImports; i++)
+                {
+                    update_buffer_kernel1[i] = (numBuffers == 1)
+                        ? kernel1[0]
+                        : ((numBuffers == 2) ? kernel1[1] : kernel1[2]);
+                    update_buffer_kernel2[i] = (numBuffers == 1)
+                        ? kernel2[0]
+                        : ((numBuffers == 2) ? kernel2[1] : kernel2[2]);
+                }
+
+                // global work size should be less than or equal
+                // to bufferSizeList[i]
+                global_work_size[0] = pBufferSize;
+
+                for (uint32_t iter = 0; iter < maxIter; iter++)
+                {
+                    if (iter == 0)
+                    {
+                        vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+                    }
+                    else
+                    {
+                        vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+                                       vkVk2CLSemaphore);
+                    }
+                    clVk2CLExternalSemaphore->wait(cmd_queue1);
+
+                    for (uint8_t launchIter = 0; launchIter < numImports;
+                         launchIter++)
+                    {
+                        err = clSetKernelArg(update_buffer_kernel1[launchIter],
+                                             0, sizeof(uint32_t),
+                                             (void *)&pBufferSize);
+                        for (int i = 0; i < numBuffers; i++)
+                        {
+                            err |= clSetKernelArg(
+                                update_buffer_kernel1[launchIter], i + 1,
+                                sizeof(cl_mem),
+                                (void *)&(buffers1[i][launchIter]));
+                        }
+
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to set arg values for "
+                                        "kernel\n ");
+                            goto CLEANUP;
+                        }
+                        err = clEnqueueNDRangeKernel(
+                            cmd_queue1, update_buffer_kernel1[launchIter], 1,
+                            NULL, global_work_size, NULL, 0, NULL, NULL);
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to launch "
+                                        "update_buffer_kernel, error\n");
+                            goto CLEANUP;
+                        }
+                    }
+                    if (iter != (maxIter - 1))
+                    {
+                        clCl2VkExternalSemaphore->signal(cmd_queue1);
+                    }
+                }
+                clFinish(cmd_queue1);
+                for (uint32_t iter = 0; iter < maxIter; iter++)
+                {
+                    if (iter == 0)
+                    {
+                        vkQueue.submit(vkCommandBuffer, vkVk2CLSemaphore);
+                    }
+                    else
+                    {
+                        vkQueue.submit(vkCl2VkSemaphore, vkCommandBuffer,
+                                       vkVk2CLSemaphore);
+                    }
+                    clVk2CLExternalSemaphore2->wait(cmd_queue2);
+
+                    for (uint8_t launchIter = 0; launchIter < numImports;
+                         launchIter++)
+                    {
+                        err = clSetKernelArg(update_buffer_kernel2[launchIter],
+                                             0, sizeof(uint32_t),
+                                             (void *)&bufferSize);
+                        for (int i = 0; i < numBuffers; i++)
+                        {
+                            err |= clSetKernelArg(
+                                update_buffer_kernel2[launchIter], i + 1,
+                                sizeof(cl_mem),
+                                (void *)&(buffers2[i][launchIter]));
+                        }
+
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to set arg values for "
+                                        "kernel\n ");
+                            goto CLEANUP;
+                        }
+                        err = clEnqueueNDRangeKernel(
+                            cmd_queue2, update_buffer_kernel2[launchIter], 1,
+                            NULL, global_work_size, NULL, 0, NULL, NULL);
+                        if (err != CL_SUCCESS)
+                        {
+                            print_error(err,
+                                        "Error: Failed to launch "
+                                        "update_buffer_kernel, error\n ");
+                            goto CLEANUP;
+                        }
+                    }
+                    if (iter != (maxIter - 1))
+                    {
+                        clCl2VkExternalSemaphore2->signal(cmd_queue2);
+                    }
+                }
+                clFinish(cmd_queue2);
+                error_3 = (uint8_t *)malloc(sizeof(uint8_t));
+                if (NULL == error_3)
+                {
+                    log_error("Not able to allocate memory\n");
+                    goto CLEANUP;
+                }
+
+                error_1 = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                         sizeof(uint8_t), NULL, &err);
+                if (CL_SUCCESS != err)
+                {
+                    print_error(err, "Error: clCreateBuffer \n");
+                    goto CLEANUP;
+                }
+                error_2 = clCreateBuffer(context2, CL_MEM_WRITE_ONLY,
+                                         sizeof(uint8_t), NULL, &err);
+                if (CL_SUCCESS != err)
+                {
+                    print_error(err, "Error: clCreateBuffer \n");
+                    goto CLEANUP;
+                }
+                uint8_t val = 0;
+                err =
+                    clEnqueueWriteBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                         sizeof(uint8_t), &val, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, "Error: Failed read output, error  \n");
+                    goto CLEANUP;
+                }
+
+                err =
+                    clEnqueueWriteBuffer(cmd_queue2, error_2, CL_TRUE, 0,
+                                         sizeof(uint8_t), &val, 0, NULL, NULL);
+                if (err != CL_SUCCESS)
+                {
+                    print_error(err, "Error: Failed read output, error  \n");
+                    goto CLEANUP;
+                }
+
+                calc_max_iter = maxIter * 2 * (numBuffers + 1);
+                for (int i = 0; i < numBuffers; i++)
+                {
+                    err = clSetKernelArg(verify_kernel, 0, sizeof(cl_mem),
+                                         (void *)&(buffers1[i][0]));
+                    err |= clSetKernelArg(verify_kernel, 1, sizeof(int),
+                                          &pBufferSize);
+                    err |= clSetKernelArg(verify_kernel, 2, sizeof(int),
+                                          &calc_max_iter);
+                    err |= clSetKernelArg(verify_kernel, 3, sizeof(cl_mem),
+                                          (void *)&error_1);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err,
+                                    "Error: Failed to set arg values for "
+                                    "verify_kernel \n");
+                        goto CLEANUP;
+                    }
+                    err = clEnqueueNDRangeKernel(cmd_queue1, verify_kernel, 1,
+                                                 NULL, global_work_size, NULL,
+                                                 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err,
+                                    "Error: Failed to launch verify_kernel,"
+                                    "error\n");
+                        goto CLEANUP;
+                    }
+
+                    err = clEnqueueReadBuffer(cmd_queue1, error_1, CL_TRUE, 0,
+                                              sizeof(uint8_t), error_3, 0, NULL,
+                                              NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err, "Error: Failed read output, error\n");
+                        goto CLEANUP;
+                    }
+                    if (*error_3 == 1)
+                    {
+                        log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+                        goto CLEANUP;
+                    }
+                }
+                *error_3 = 0;
+                for (int i = 0; i < vkBufferList.size(); i++)
+                {
+                    err = clSetKernelArg(verify_kernel2, 0, sizeof(cl_mem),
+                                         (void *)&(buffers2[i][0]));
+                    err |= clSetKernelArg(verify_kernel2, 1, sizeof(int),
+                                          &pBufferSize);
+                    err |= clSetKernelArg(verify_kernel2, 2, sizeof(int),
+                                          &calc_max_iter);
+                    err |= clSetKernelArg(verify_kernel2, 3, sizeof(cl_mem),
+                                          (void *)&error_2);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err,
+                                    "Error: Failed to set arg values for "
+                                    "verify_kernel \n");
+                        goto CLEANUP;
+                    }
+                    err = clEnqueueNDRangeKernel(cmd_queue2, verify_kernel2, 1,
+                                                 NULL, global_work_size, NULL,
+                                                 0, NULL, NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err,
+                                    "Error: Failed to launch verify_kernel,"
+                                    "error\n");
+                        goto CLEANUP;
+                    }
+
+                    err = clEnqueueReadBuffer(cmd_queue2, error_2, CL_TRUE, 0,
+                                              sizeof(uint8_t), error_3, 0, NULL,
+                                              NULL);
+                    if (err != CL_SUCCESS)
+                    {
+                        print_error(err, "Error: Failed read output, error\n");
+                        goto CLEANUP;
+                    }
+                    if (*error_3 == 1)
+                    {
+                        log_error("&&&& vulkan_opencl_buffer test FAILED\n");
+                        goto CLEANUP;
+                    }
+                }
+                for (size_t i = 0; i < vkBufferList.size(); i++)
+                {
+                    for (size_t j = 0; j < numImports; j++)
+                    {
+                        delete externalMemory1[i][j];
+                        delete externalMemory2[i][j];
+                    }
+                }
+                for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+                {
+                    delete vkBufferListDeviceMemory[i];
+                }
+                vkBufferListDeviceMemory.erase(vkBufferListDeviceMemory.begin(),
+                                               vkBufferListDeviceMemory.end());
+                for (size_t i = 0; i < externalMemory1.size(); i++)
+                {
+                    externalMemory1[i].erase(externalMemory1[i].begin(),
+                                             externalMemory1[i].begin()
+                                                 + numBuffers);
+                    externalMemory2[i].erase(externalMemory2[i].begin(),
+                                             externalMemory2[i].begin()
+                                                 + numBuffers);
+                }
+                externalMemory1.clear();
+                externalMemory2.clear();
+            }
+        }
+    }
+CLEANUP:
+    for (size_t i = 0; i < vkBufferListDeviceMemory.size(); i++)
+    {
+        if (vkBufferListDeviceMemory[i])
+        {
+            delete vkBufferListDeviceMemory[i];
+        }
+    }
+    for (size_t i = 0; i < externalMemory1.size(); i++)
+    {
+        for (size_t j = 0; j < externalMemory1[i].size(); j++)
+        {
+            if (externalMemory1[i][j])
+            {
+                delete externalMemory1[i][j];
+            }
+        }
+    }
+    for (size_t i = 0; i < externalMemory2.size(); i++)
+    {
+        for (size_t j = 0; j < externalMemory2[i].size(); j++)
+        {
+            if (externalMemory2[i][j])
+            {
+                delete externalMemory2[i][j];
+            }
+        }
+    }
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+    if (clVk2CLExternalSemaphore2) delete clVk2CLExternalSemaphore2;
+    if (clCl2VkExternalSemaphore2) delete clCl2VkExternalSemaphore2;
+    if (error_3) free(error_3);
+    if (error_1) clReleaseMemObject(error_1);
+    if (error_2) clReleaseMemObject(error_2);
+    return err;
+}
+
+int test_buffer_common(cl_device_id device_, cl_context context_,
+                       cl_command_queue queue_, int numElements_)
+{
+
+    int current_device = 0;
+    int device_count = 0;
+    int devices_prohibited = 0;
+    cl_int errNum = CL_SUCCESS;
+    cl_platform_id platform = NULL;
+    size_t extensionSize = 0;
+    cl_uint num_devices = 0;
+    cl_uint device_no = 0;
+    const size_t bufsize = BUFFERSIZE;
+    char buf[BUFFERSIZE];
+    cl_device_id *devices;
+    char *extensions = NULL;
+    cl_kernel verify_kernel;
+    cl_kernel verify_kernel2;
+    cl_kernel kernel[3] = { NULL, NULL, NULL };
+    cl_kernel kernel2[3] = { NULL, NULL, NULL };
+    const char *program_source_const[3] = { kernel_text_numbuffer_1,
+                                            kernel_text_numbuffer_2,
+                                            kernel_text_numbuffer_4 };
+    const char *program_source_const_verify;
+    size_t program_source_length;
+    cl_command_queue cmd_queue1 = NULL;
+    cl_command_queue cmd_queue2 = NULL;
+    cl_command_queue cmd_queue3 = NULL;
+    cl_context context = NULL;
+    cl_program program[3] = { NULL, NULL, NULL };
+    cl_program program_verify, program_verify2;
+    cl_context context2 = NULL;
+
+
+    VulkanDevice vkDevice;
+    uint32_t numBuffersList[] = { 1, 2, 4 };
+    uint32_t bufferSizeList[] = { 4 * 1024, 64 * 1024, 2 * 1024 * 1024 };
+    uint32_t bufferSizeListforOffset[] = { 256, 512, 1024 };
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    if (errNum != CL_SUCCESS)
+    {
+        print_error(errNum, "Error: Failed to get platform\n");
+        goto CLEANUP;
+    }
+
+    errNum =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+    if (CL_SUCCESS != errNum)
+    {
+        print_error(errNum, "clGetDeviceIDs failed in returning of devices\n");
+        goto CLEANUP;
+    }
+    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+    if (NULL == devices)
+    {
+        errNum = CL_OUT_OF_HOST_MEMORY;
+        print_error(errNum, "Unable to allocate memory for devices\n");
+        goto CLEANUP;
+    }
+    errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+                            NULL);
+    if (CL_SUCCESS != errNum)
+    {
+        print_error(errNum, "Failed to get deviceID.\n");
+        goto CLEANUP;
+    }
+    contextProperties[1] = (cl_context_properties)platform;
+    log_info("Assigned contextproperties for platform\n");
+    for (device_no = 0; device_no < num_devices; device_no++)
+    {
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+                                 NULL, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum,
+                        "Error in clGetDeviceInfo for getting device_extension "
+                        "size....\n");
+            goto CLEANUP;
+        }
+        extensions = (char *)malloc(extensionSize);
+        if (NULL == extensions)
+        {
+            print_error(errNum, "Unable to allocate memory for extensions\n");
+            errNum = CL_OUT_OF_HOST_MEMORY;
+            goto CLEANUP;
+        }
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+                                 extensionSize, extensions, NULL);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum,
+                        "Error in clGetDeviceInfo for device_extension\n");
+            goto CLEANUP;
+        }
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+                                 CL_UUID_SIZE_KHR, uuid, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum, "clGetDeviceInfo failed\n");
+            goto CLEANUP;
+        }
+        errNum =
+            memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+        if (errNum == 0)
+        {
+            break;
+        }
+    }
+    if (device_no >= num_devices)
+    {
+        errNum = EXIT_FAILURE;
+        print_error(errNum,
+                    "OpenCL error: "
+                    "No Vulkan-OpenCL Interop capable GPU found.\n");
+        goto CLEANUP;
+    }
+    deviceId = devices[device_no];
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &errNum);
+    if (CL_SUCCESS != errNum)
+    {
+        print_error(errNum, "error creating context\n");
+        goto CLEANUP;
+    }
+    log_info("Successfully created context !!!\n");
+
+    cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
+    if (CL_SUCCESS != errNum)
+    {
+        errNum = CL_INVALID_COMMAND_QUEUE;
+        print_error(errNum, "Error: Failed to create command queue!\n");
+        goto CLEANUP;
+    }
+    cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &errNum);
+    if (CL_SUCCESS != errNum)
+    {
+        errNum = CL_INVALID_COMMAND_QUEUE;
+        print_error(errNum, "Error: Failed to create command queue!\n");
+        goto CLEANUP;
+    }
+    log_info("clCreateCommandQueue successful\n");
+    for (int i = 0; i < 3; i++)
+    {
+        program_source_length = strlen(program_source_const[i]);
+        program[i] =
+            clCreateProgramWithSource(context, 1, &program_source_const[i],
+                                      &program_source_length, &errNum);
+        errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
+        if (errNum != CL_SUCCESS)
+        {
+            print_error(errNum, "Error: Failed to build program \n");
+            return errNum;
+        }
+        // create the kernel
+        kernel[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
+        if (errNum != CL_SUCCESS)
+        {
+            print_error(errNum, "clCreateKernel failed \n");
+            return errNum;
+        }
+    }
+
+    program_source_const_verify = kernel_text_verify;
+    program_source_length = strlen(program_source_const_verify);
+    program_verify =
+        clCreateProgramWithSource(context, 1, &program_source_const_verify,
+                                  &program_source_length, &errNum);
+    errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
+    if (errNum != CL_SUCCESS)
+    {
+        log_error("Error: Failed to build program2\n");
+        return errNum;
+    }
+    verify_kernel = clCreateKernel(program_verify, "checkKernel", &errNum);
+    if (errNum != CL_SUCCESS)
+    {
+        print_error(errNum, "clCreateKernel failed \n");
+        return errNum;
+    }
+
+    if (multiCtx) // different context guard
+    {
+        context2 = clCreateContextFromType(
+            contextProperties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errNum);
+        if (CL_SUCCESS != errNum)
+        {
+            print_error(errNum, "error creating context\n");
+            goto CLEANUP;
+        }
+        cmd_queue3 =
+            clCreateCommandQueue(context2, devices[device_no], 0, &errNum);
+        if (CL_SUCCESS != errNum)
+        {
+            errNum = CL_INVALID_COMMAND_QUEUE;
+            print_error(errNum, "Error: Failed to create command queue!\n");
+            goto CLEANUP;
+        }
+        for (int i = 0; i < 3; i++)
+        {
+            program_source_length = strlen(program_source_const[i]);
+            program[i] =
+                clCreateProgramWithSource(context2, 1, &program_source_const[i],
+                                          &program_source_length, &errNum);
+            errNum = clBuildProgram(program[i], 0, NULL, NULL, NULL, NULL);
+            if (errNum != CL_SUCCESS)
+            {
+                print_error(errNum, "Error: Failed to build program \n");
+                return errNum;
+            }
+            // create the kernel
+            kernel2[i] = clCreateKernel(program[i], "clUpdateBuffer", &errNum);
+            if (errNum != CL_SUCCESS)
+            {
+                print_error(errNum, "clCreateKernel failed \n");
+                return errNum;
+            }
+        }
+        program_source_length = strlen(program_source_const_verify);
+        program_verify =
+            clCreateProgramWithSource(context2, 1, &program_source_const_verify,
+                                      &program_source_length, &errNum);
+        errNum = clBuildProgram(program_verify, 0, NULL, NULL, NULL, NULL);
+        if (errNum != CL_SUCCESS)
+        {
+            log_error("Error: Failed to build program2\n");
+            return errNum;
+        }
+        verify_kernel2 = clCreateKernel(program_verify, "checkKernel", &errNum);
+        if (errNum != CL_SUCCESS)
+        {
+            print_error(errNum, "clCreateKernel failed \n");
+            return errNum;
+        }
+    }
+
+    for (size_t numBuffersIdx = 0; numBuffersIdx < ARRAY_SIZE(numBuffersList);
+         numBuffersIdx++)
+    {
+        uint32_t numBuffers = numBuffersList[numBuffersIdx];
+        log_info("Number of buffers: %d\n", numBuffers);
+        for (size_t sizeIdx = 0; sizeIdx < ARRAY_SIZE(bufferSizeList);
+             sizeIdx++)
+        {
+            uint32_t bufferSize = bufferSizeList[sizeIdx];
+            uint32_t bufferSizeForOffset = bufferSizeListforOffset[sizeIdx];
+            log_info("&&&& RUNNING vulkan_opencl_buffer test for Buffer size: "
+                     "%d\n",
+                     bufferSize);
+            if (multiImport && !multiCtx)
+            {
+                errNum = run_test_with_multi_import_same_ctx(
+                    context, cmd_queue1, kernel, verify_kernel, vkDevice,
+                    numBuffers, bufferSize, bufferSizeForOffset);
+            }
+            else if (multiImport && multiCtx)
+            {
+                errNum = run_test_with_multi_import_diff_ctx(
+                    context, context2, cmd_queue1, cmd_queue3, kernel, kernel2,
+                    verify_kernel, verify_kernel2, vkDevice, numBuffers,
+                    bufferSize, bufferSizeForOffset);
+            }
+            else if (numCQ == 2)
+            {
+                errNum = run_test_with_two_queue(
+                    context, cmd_queue1, cmd_queue2, kernel, verify_kernel,
+                    vkDevice, numBuffers + 1, bufferSize);
+            }
+            else
+            {
+                errNum = run_test_with_one_queue(context, cmd_queue1, kernel,
+                                                 verify_kernel, vkDevice,
+                                                 numBuffers, bufferSize);
+            }
+            if (errNum != CL_SUCCESS)
+            {
+                print_error(errNum, "func_name failed \n");
+                goto CLEANUP;
+            }
+        }
+    }
+
+CLEANUP:
+    for (int i = 0; i < 3; i++)
+    {
+        if (program[i]) clReleaseProgram(program[i]);
+        if (kernel[i]) clReleaseKernel(kernel[i]);
+    }
+    if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
+    if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
+    if (cmd_queue3) clReleaseCommandQueue(cmd_queue3);
+    if (context) clReleaseContext(context);
+    if (context2) clReleaseContext(context2);
+
+    if (devices) free(devices);
+    if (extensions) free(extensions);
+
+    return errNum;
+}
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
new file mode 100644
index 00000000..f1d0af1f
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -0,0 +1,1648 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#define NOMINMAX
+#include <vulkan_interop_common.hpp>
+#include <string>
+#include "harness/errorHelpers.h"
+
+#define MAX_2D_IMAGES 5
+#define MAX_2D_IMAGE_WIDTH 1024
+#define MAX_2D_IMAGE_HEIGHT 1024
+#define MAX_2D_IMAGE_ELEMENT_SIZE 16
+#define MAX_2D_IMAGE_MIP_LEVELS 11
+#define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
+#define GLSL_FORMAT_STRING "<GLSL_FORMAT>"
+#define GLSL_TYPE_PREFIX_STRING "<GLSL_TYPE_PREFIX>"
+#define NUM_THREADS_PER_GROUP_X 32
+#define NUM_THREADS_PER_GROUP_Y 32
+#define NUM_BLOCKS(size, blockSize)                                            \
+    (ROUND_UP((size), (blockSize)) / (blockSize))
+
+#define ASSERT(x)                                                              \
+    if (!(x))                                                                  \
+    {                                                                          \
+        fprintf(stderr, "Assertion \"%s\" failed at %s:%d\n", #x, __FILE__,    \
+                __LINE__);                                                     \
+        exit(1);                                                               \
+    }
+
+#define ASSERT_LEQ(x, y)                                                       \
+    if (x > y)                                                                 \
+    {                                                                          \
+        ASSERT(0);                                                             \
+    }
+
+namespace {
+struct Params
+{
+    uint32_t numImage2DDescriptors;
+};
+}
+static cl_uchar uuid[CL_UUID_SIZE_KHR];
+static cl_device_id deviceId = NULL;
+
+static const char *vkImage2DShader =
+    "#version 450\n"
+    "#extension GL_ARB_separate_shader_objects : enable\n"
+    "#extension GL_NV_gpu_shader5 : enable\n"
+    "layout(binding = 0) buffer Params\n"
+    "{\n"
+    "    uint32_t numImage2DDescriptors;\n"
+    "};\n"
+    "layout(binding = 1, " GLSL_FORMAT_STRING
+    ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING(
+        MAX_2D_IMAGE_DESCRIPTORS) "];\n"
+                                  "layout(local_size_x = 32, local_size_y = "
+                                  "32) in;\n"
+                                  "void main() {\n"
+                                  "    uvec3 numThreads = gl_NumWorkGroups * "
+                                  "gl_WorkGroupSize;\n"
+                                  "    for (uint32_t image2DIdx = 0; "
+                                  "image2DIdx < numImage2DDescriptors; "
+                                  "image2DIdx++)"
+                                  "    {\n"
+                                  "        ivec2 imageDim = "
+                                  "imageSize(image2DList[image2DIdx]);\n"
+                                  "        uint32_t heightBy2 = imageDim.y / "
+                                  "2;\n"
+                                  "        for (uint32_t row = "
+                                  "gl_GlobalInvocationID.y; row < heightBy2; "
+                                  "row += numThreads.y)"
+                                  "        {\n"
+                                  "            for (uint32_t col = "
+                                  "gl_GlobalInvocationID.x; col < imageDim.x; "
+                                  "col += numThreads.x)"
+                                  "            {\n"
+                                  "                ivec2 coordsA = ivec2(col, "
+                                  "row);\n"
+                                  "                ivec2 coordsB = ivec2(col, "
+                                  "imageDim.y - row - 1);\n"
+                                  "                " GLSL_TYPE_PREFIX_STRING
+                                  "vec4 dataA = "
+                                  "imageLoad(image2DList[image2DIdx], "
+                                  "coordsA);\n"
+                                  "                " GLSL_TYPE_PREFIX_STRING
+                                  "vec4 dataB = "
+                                  "imageLoad(image2DList[image2DIdx], "
+                                  "coordsB);\n"
+                                  "                "
+                                  "imageStore(image2DList[image2DIdx], "
+                                  "coordsA, dataB);\n"
+                                  "                "
+                                  "imageStore(image2DList[image2DIdx], "
+                                  "coordsB, dataA);\n"
+                                  "            }\n"
+                                  "        }\n"
+                                  "    }\n"
+                                  "}\n";
+
+const char *kernel_text_numImage_1 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage, write_only image2d_t OutImage, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)\n\
+{\n\
+    int threadIdxX = get_global_id(0);\n\
+    int threadIdxY = get_global_id(1);\n\
+    int numThreadsX = get_global_size(0);                                                                                                  \n\
+    int numThreadsY = get_global_size(1);\n\
+    if (threadIdxX >= baseWidth || threadIdxY >= baseHeight)\n\
+    {\n\
+        return;\n\
+    }\n\
+    %s dataA =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataB =  read_image%s(InputImage, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    write_image%s(OutImage, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+    write_image%s(OutImage, (int2)( threadIdxX, threadIdxY), dataB);\n\
+\n\
+}";
+
+const char *kernel_text_numImage_2 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2,write_only image2d_t OutImage_2,int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
+{\n\
+    int threadIdxX = get_global_id(0);\n\
+    int threadIdxY = get_global_id(1);\n\
+    int numThreadsX = get_global_size(0);\n\
+    int numThreadsY = get_global_size(1);\n\
+    if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
+    {\n\
+        return;\n\
+    }\n\
+    %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+    write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
+    write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
+    write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
+\n\
+}";
+
+const char *kernel_text_numImage_4 = " \
+__constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
+__kernel void image2DKernel(read_only image2d_t InputImage_1, write_only image2d_t OutImage_1, read_only image2d_t InputImage_2, write_only image2d_t OutImage_2, read_only image2d_t InputImage_3, write_only image2d_t OutImage_3, read_only image2d_t InputImage_4, write_only image2d_t OutImage_4, int num2DImages, int baseWidth, int baseHeight, int numMipLevels)    \n\
+{\n\
+    int threadIdxX = get_global_id(0);\n\
+    int threadIdxY = get_global_id(1);\n\
+    int numThreadsX = get_global_size(0);\n\
+    int numThreadsY = get_global_size(1);\n\
+    if (threadIdxX >= baseWidth || threadIdxY >= baseHeight) \n\
+    {\n\
+        return;\n\
+    }\n\
+    %s dataA =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataB =  read_image%s(InputImage_1, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    %s dataC =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataD =  read_image%s(InputImage_2, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    %s dataE =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataF =  read_image%s(InputImage_3, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    %s dataG =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, threadIdxY)); \n\
+    %s dataH =  read_image%s(InputImage_4, smpImg, (int2)(threadIdxX, baseHeight-threadIdxY-1)); \n\
+    write_image%s(OutImage_1, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataA);\n\
+    write_image%s(OutImage_1, (int2)(threadIdxX, threadIdxY), dataB);\n\
+    write_image%s(OutImage_2, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataC);\n\
+    write_image%s(OutImage_2, (int2)(threadIdxX, threadIdxY), dataD);\n\
+    write_image%s(OutImage_3, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataE);\n\
+    write_image%s(OutImage_3, (int2)(threadIdxX, threadIdxY), dataF);\n\
+    write_image%s(OutImage_4, (int2)(threadIdxX, baseHeight-threadIdxY-1), dataG);\n\
+    write_image%s(OutImage_4, (int2)(threadIdxX, threadIdxY), dataH);\n\
+\n\
+}";
+
+const uint32_t num2DImagesList[] = { 1, 2, 4 };
+const uint32_t widthList[] = { 4, 64, 183, 1024 };
+const uint32_t heightList[] = { 4, 64, 365 };
+
+const cl_kernel getKernelType(VulkanFormat format, cl_kernel kernel_float,
+                              cl_kernel kernel_signed,
+                              cl_kernel kernel_unsigned)
+{
+    cl_kernel kernel;
+    switch (format)
+    {
+        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: kernel = kernel_float; break;
+
+        case VULKAN_FORMAT_R32G32B32A32_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R32G32B32A32_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R16G16B16A16_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R16G16B16A16_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R8G8B8A8_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R8G8B8A8_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R32G32_SFLOAT: kernel = kernel_float; break;
+
+        case VULKAN_FORMAT_R32G32_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R32G32_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R16G16_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R16G16_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R8G8_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R8G8_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R32_SFLOAT: kernel = kernel_float; break;
+
+        case VULKAN_FORMAT_R32_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R32_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R16_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R16_SINT: kernel = kernel_signed; break;
+
+        case VULKAN_FORMAT_R8_UINT: kernel = kernel_unsigned; break;
+
+        case VULKAN_FORMAT_R8_SINT: kernel = kernel_signed; break;
+
+        default:
+            log_error(" Unsupported format");
+            ASSERT(0);
+            break;
+    }
+    return kernel;
+}
+
+int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
+                            cl_command_queue &cmd_queue2,
+                            cl_kernel *kernel_unsigned,
+                            cl_kernel *kernel_signed, cl_kernel *kernel_float,
+                            VulkanDevice &vkDevice)
+{
+    cl_int err = CL_SUCCESS;
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { 1, 1, 1 };
+
+    cl_kernel updateKernelCQ1, updateKernelCQ2;
+    std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    char magicValue = 0;
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+
+    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
+        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
+    VulkanDeviceMemory vkSrcBufferDeviceMemory(
+        vkDevice, vkSrcBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
+
+    char *srcBufferPtr, *dstBufferPtr;
+    srcBufferPtr = (char *)malloc(maxImage2DSize);
+    dstBufferPtr = (char *)malloc(maxImage2DSize);
+
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
+        VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
+    VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+    for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
+    {
+        VulkanFormat vkFormat = vkFormatList[fIdx];
+        log_info("Format: %d\n", vkFormat);
+        uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
+        ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
+        log_info("elementSize= %d\n", elementSize);
+        std::map<std::string, std::string> patternToSubstituteMap;
+        patternToSubstituteMap[GLSL_FORMAT_STRING] =
+            getVulkanFormatGLSLFormat(vkFormat);
+        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
+            getVulkanFormatGLSLTypePrefix(vkFormat);
+
+        VulkanShaderModule vkImage2DShaderModule(
+            vkDevice,
+            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+        VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                                vkImage2DShaderModule);
+
+        for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
+        {
+            uint32_t width = widthList[wIdx];
+            log_info("Width: %d\n", width);
+            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            region[0] = width;
+            for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
+            {
+                uint32_t height = heightList[hIdx];
+                log_info("Height: %d", height);
+                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                region[1] = height;
+
+                uint32_t numMipLevels = 1;
+                log_info("Number of mipmap levels: %d\n", numMipLevels);
+
+                magicValue++;
+                char *vkSrcBufferDeviceMemoryPtr =
+                    (char *)vkSrcBufferDeviceMemory.map();
+                uint64_t srcBufSize = 0;
+                memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
+                memset(srcBufferPtr, 0, maxImage2DSize);
+                uint32_t mipLevel = 0;
+                for (uint32_t row = 0;
+                     row < std::max(height >> mipLevel, uint32_t(1)); row++)
+                {
+                    for (uint32_t col = 0;
+                         col < std::max(width >> mipLevel, uint32_t(1)); col++)
+                    {
+                        for (uint32_t elementByte = 0;
+                             elementByte < elementSize; elementByte++)
+                        {
+                            vkSrcBufferDeviceMemoryPtr[srcBufSize] =
+                                (char)(magicValue + mipLevel + row + col);
+                            srcBufferPtr[srcBufSize] =
+                                (char)(magicValue + mipLevel + row + col);
+                            srcBufSize++;
+                        }
+                    }
+                }
+                srcBufSize = ROUND_UP(
+                    srcBufSize,
+                    std::max(
+                        elementSize,
+                        (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+                vkSrcBufferDeviceMemory.unmap();
+
+                for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
+                     niIdx++)
+                {
+                    uint32_t num2DImages = num2DImagesList[niIdx] + 1;
+                    // added one image for cross-cq case for updateKernelCQ2
+                    log_info("Number of images: %d\n", num2DImages);
+                    ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
+                    uint32_t num_2D_image;
+                    if (useSingleImageKernel)
+                    {
+                        num_2D_image = 1;
+                    }
+                    else
+                    {
+                        num_2D_image = num2DImages;
+                    }
+                    Params *params = (Params *)vkParamsDeviceMemory.map();
+                    params->numImage2DDescriptors = num_2D_image * numMipLevels;
+                    vkParamsDeviceMemory.unmap();
+                    vkDescriptorSet.update(0, vkParamsBuffer);
+                    for (size_t emhtIdx = 0;
+                         emhtIdx < vkExternalMemoryHandleTypeList.size();
+                         emhtIdx++)
+                    {
+                        VulkanExternalMemoryHandleType
+                            vkExternalMemoryHandleType =
+                                vkExternalMemoryHandleTypeList[emhtIdx];
+                        log_info("External memory handle type: %d \n",
+                                 vkExternalMemoryHandleType);
+                        if ((true == disableNTHandleType)
+                            && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+                                == vkExternalMemoryHandleType))
+                        {
+                            // Skip running for WIN32 NT handle.
+                            continue;
+                        }
+                        VulkanImage2D vkDummyImage2D(
+                            vkDevice, vkFormatList[0], widthList[0],
+                            heightList[0], 1, vkExternalMemoryHandleType);
+                        const VulkanMemoryTypeList &memoryTypeList =
+                            vkDummyImage2D.getMemoryTypeList();
+
+                        std::vector<VulkanDeviceMemory *>
+                            vkNonDedicatedImage2DListDeviceMemory1;
+                        std::vector<VulkanDeviceMemory *>
+                            vkNonDedicatedImage2DListDeviceMemory2;
+                        std::vector<clExternalMemoryImage *>
+                            nonDedicatedExternalMemory1;
+                        std::vector<clExternalMemoryImage *>
+                            nonDedicatedExternalMemory2;
+                        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
+                             mtIdx++)
+                        {
+                            const VulkanMemoryType &memoryType =
+                                memoryTypeList[mtIdx];
+                            log_info("Memory type index: %d\n",
+                                     (uint32_t)memoryType);
+                            log_info("Memory type property: %d\n",
+                                     memoryType.getMemoryTypeProperty());
+                            if (!useDeviceLocal)
+                            {
+                                if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
+                                    == memoryType.getMemoryTypeProperty())
+                                {
+                                    continue;
+                                }
+                            }
+
+                            size_t totalImageMemSize = 0;
+                            uint64_t interImageOffset = 0;
+                            {
+                                VulkanImage2D vkImage2D(
+                                    vkDevice, vkFormat, width, height,
+                                    numMipLevels, vkExternalMemoryHandleType);
+                                ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
+                                totalImageMemSize =
+                                    ROUND_UP(vkImage2D.getSize(),
+                                             vkImage2D.getAlignment());
+                            }
+                            VulkanImage2DList vkNonDedicatedImage2DList(
+                                num2DImages, vkDevice, vkFormat, width, height,
+                                numMipLevels, vkExternalMemoryHandleType);
+                            for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
+                            {
+                                if (non_dedicated)
+                                {
+                                    vkNonDedicatedImage2DListDeviceMemory1
+                                        .push_back(new VulkanDeviceMemory(
+                                            vkDevice, totalImageMemSize,
+                                            memoryType,
+                                            vkExternalMemoryHandleType));
+                                }
+                                else
+                                {
+                                    vkNonDedicatedImage2DListDeviceMemory1
+                                        .push_back(new VulkanDeviceMemory(
+                                            vkDevice,
+                                            vkNonDedicatedImage2DList[bIdx],
+                                            memoryType,
+                                            vkExternalMemoryHandleType));
+                                }
+                                vkNonDedicatedImage2DListDeviceMemory1[bIdx]
+                                    ->bindImage(vkNonDedicatedImage2DList[bIdx],
+                                                0);
+                                nonDedicatedExternalMemory1.push_back(
+                                    new clExternalMemoryImage(
+                                        *vkNonDedicatedImage2DListDeviceMemory1
+                                            [bIdx],
+                                        vkExternalMemoryHandleType, context,
+                                        totalImageMemSize, width, height, 0,
+                                        vkNonDedicatedImage2DList[bIdx],
+                                        deviceId));
+                            }
+                            VulkanImageViewList vkNonDedicatedImage2DViewList(
+                                vkDevice, vkNonDedicatedImage2DList);
+                            VulkanImage2DList vkNonDedicatedImage2DList2(
+                                num2DImages, vkDevice, vkFormat, width, height,
+                                numMipLevels, vkExternalMemoryHandleType);
+                            for (size_t bIdx = 0; bIdx < num2DImages; bIdx++)
+                            {
+                                if (non_dedicated)
+                                {
+                                    vkNonDedicatedImage2DListDeviceMemory2
+                                        .push_back(new VulkanDeviceMemory(
+                                            vkDevice, totalImageMemSize,
+                                            memoryType,
+                                            vkExternalMemoryHandleType));
+                                }
+                                else
+                                {
+                                    vkNonDedicatedImage2DListDeviceMemory2
+                                        .push_back(new VulkanDeviceMemory(
+                                            vkDevice,
+                                            vkNonDedicatedImage2DList2[bIdx],
+                                            memoryType,
+                                            vkExternalMemoryHandleType));
+                                }
+                                vkNonDedicatedImage2DListDeviceMemory2[bIdx]
+                                    ->bindImage(
+                                        vkNonDedicatedImage2DList2[bIdx], 0);
+                                nonDedicatedExternalMemory2.push_back(
+                                    new clExternalMemoryImage(
+                                        *vkNonDedicatedImage2DListDeviceMemory2
+                                            [bIdx],
+                                        vkExternalMemoryHandleType, context,
+                                        totalImageMemSize, width, height, 0,
+                                        vkNonDedicatedImage2DList2[bIdx],
+                                        deviceId));
+                            }
+                            VulkanImageViewList vkDedicatedImage2DViewList(
+                                vkDevice, vkNonDedicatedImage2DList2);
+
+                            cl_mem external_mem_image1[5];
+                            cl_mem external_mem_image2[5];
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                external_mem_image1[i] =
+                                    nonDedicatedExternalMemory1[i]
+                                        ->getExternalMemoryImage();
+                                external_mem_image2[i] =
+                                    nonDedicatedExternalMemory2[i]
+                                        ->getExternalMemoryImage();
+                            }
+                            VulkanImage2DList &vkImage2DList =
+                                vkNonDedicatedImage2DList;
+                            VulkanImageViewList &vkImage2DViewList =
+                                vkNonDedicatedImage2DViewList;
+
+                            clCl2VkExternalSemaphore->signal(cmd_queue1);
+                            if (!useSingleImageKernel)
+                            {
+                                for (size_t i2DIdx = 0;
+                                     i2DIdx < vkImage2DList.size(); i2DIdx++)
+                                {
+                                    for (uint32_t mipLevel = 0;
+                                         mipLevel < numMipLevels; mipLevel++)
+                                    {
+                                        uint32_t i2DvIdx =
+                                            (uint32_t)(i2DIdx * numMipLevels)
+                                            + mipLevel;
+                                        vkDescriptorSet.update(
+                                            1 + i2DvIdx,
+                                            vkImage2DViewList[i2DvIdx]);
+                                    }
+                                }
+                                vkCopyCommandBuffer.begin();
+                                vkCopyCommandBuffer.pipelineBarrier(
+                                    vkImage2DList,
+                                    VULKAN_IMAGE_LAYOUT_UNDEFINED,
+                                    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                for (size_t i2DIdx = 0;
+                                     i2DIdx < vkImage2DList.size(); i2DIdx++)
+                                {
+                                    vkCopyCommandBuffer.copyBufferToImage(
+                                        vkSrcBuffer, vkImage2DList[i2DIdx],
+                                        VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                }
+                                vkCopyCommandBuffer.pipelineBarrier(
+                                    vkImage2DList,
+                                    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                    VULKAN_IMAGE_LAYOUT_GENERAL);
+                                vkCopyCommandBuffer.end();
+                                memset(dstBufferPtr, 0, srcBufSize);
+                                vkQueue.submit(vkCopyCommandBuffer);
+                                vkShaderCommandBuffer.begin();
+                                vkShaderCommandBuffer.bindPipeline(
+                                    vkComputePipeline);
+                                vkShaderCommandBuffer.bindDescriptorSets(
+                                    vkComputePipeline, vkPipelineLayout,
+                                    vkDescriptorSet);
+                                vkShaderCommandBuffer.dispatch(
+                                    NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
+                                    NUM_BLOCKS(height,
+                                               NUM_THREADS_PER_GROUP_Y / 2),
+                                    1);
+                                vkShaderCommandBuffer.end();
+                            }
+                            for (uint32_t iter = 0; iter < innerIterations;
+                                 iter++)
+                            {
+                                if (useSingleImageKernel)
+                                {
+                                    for (size_t i2DIdx = 0;
+                                         i2DIdx < vkImage2DList.size();
+                                         i2DIdx++)
+                                    {
+                                        vkDescriptorSet.update(
+                                            1, vkImage2DViewList[i2DIdx]);
+                                        vkCopyCommandBuffer.begin();
+                                        vkCopyCommandBuffer.pipelineBarrier(
+                                            vkImage2DList,
+                                            VULKAN_IMAGE_LAYOUT_UNDEFINED,
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+                                        vkCopyCommandBuffer.copyBufferToImage(
+                                            vkSrcBuffer, vkImage2DList[i2DIdx],
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                        vkCopyCommandBuffer.pipelineBarrier(
+                                            vkImage2DList,
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                            VULKAN_IMAGE_LAYOUT_GENERAL);
+                                        vkCopyCommandBuffer.end();
+                                        memset(dstBufferPtr, 0, srcBufSize);
+                                        vkQueue.submit(vkCopyCommandBuffer);
+                                        vkShaderCommandBuffer.begin();
+                                        vkShaderCommandBuffer.bindPipeline(
+                                            vkComputePipeline);
+                                        vkShaderCommandBuffer
+                                            .bindDescriptorSets(
+                                                vkComputePipeline,
+                                                vkPipelineLayout,
+                                                vkDescriptorSet);
+                                        vkShaderCommandBuffer.dispatch(
+                                            NUM_BLOCKS(width,
+                                                       NUM_THREADS_PER_GROUP_X),
+                                            NUM_BLOCKS(height,
+                                                       NUM_THREADS_PER_GROUP_Y
+                                                           / 2),
+                                            1);
+                                        vkShaderCommandBuffer.end();
+                                        if (i2DIdx < vkImage2DList.size() - 1)
+                                        {
+                                            vkQueue.submit(
+                                                vkShaderCommandBuffer);
+                                        }
+                                    }
+                                }
+                                vkQueue.submit(vkCl2VkSemaphore,
+                                               vkShaderCommandBuffer,
+                                               vkVk2CLSemaphore);
+                                clVk2CLExternalSemaphore->wait(cmd_queue1);
+                                switch (num2DImages)
+                                {
+                                    case 2:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[0],
+                                            kernel_signed[0],
+                                            kernel_unsigned[0]);
+                                        break;
+                                    case 3:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[1],
+                                            kernel_signed[1],
+                                            kernel_unsigned[1]);
+                                        break;
+                                    case 5:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[2],
+                                            kernel_signed[2],
+                                            kernel_unsigned[2]);
+                                        break;
+                                }
+                                updateKernelCQ2 = getKernelType(
+                                    vkFormat, kernel_float[3], kernel_signed[3],
+                                    kernel_unsigned[3]);
+                                // similar kernel-type based on vkFormat
+                                int j = 0;
+                                // Setting arguments of updateKernelCQ2
+
+                                err = clSetKernelArg(updateKernelCQ2, 0,
+                                                     sizeof(cl_mem),
+                                                     &external_mem_image1[0]);
+                                err |= clSetKernelArg(updateKernelCQ2, 1,
+                                                      sizeof(cl_mem),
+                                                      &external_mem_image2[0]);
+                                err |= clSetKernelArg(
+                                    updateKernelCQ2, 2, sizeof(cl_mem),
+                                    &external_mem_image1[num2DImages - 1]);
+                                err |= clSetKernelArg(
+                                    updateKernelCQ2, 3, sizeof(cl_mem),
+                                    &external_mem_image2[num2DImages - 1]);
+                                err |= clSetKernelArg(updateKernelCQ2, 4,
+                                                      sizeof(unsigned int),
+                                                      &num2DImages);
+                                err |= clSetKernelArg(updateKernelCQ2, 5,
+                                                      sizeof(unsigned int),
+                                                      &width);
+                                err |= clSetKernelArg(updateKernelCQ2, 6,
+                                                      sizeof(unsigned int),
+                                                      &height);
+                                err |= clSetKernelArg(updateKernelCQ2, 7,
+                                                      sizeof(unsigned int),
+                                                      &numMipLevels);
+                                for (int i = 0; i < num2DImages - 1; i++, ++j)
+                                {
+                                    err = clSetKernelArg(
+                                        updateKernelCQ1, j, sizeof(cl_mem),
+                                        &external_mem_image1[i]);
+                                    err |= clSetKernelArg(
+                                        updateKernelCQ1, ++j, sizeof(cl_mem),
+                                        &external_mem_image2[i]);
+                                }
+                                err |= clSetKernelArg(updateKernelCQ1, j,
+                                                      sizeof(unsigned int),
+                                                      &num2DImages);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &width);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &height);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &numMipLevels);
+
+                                if (err != CL_SUCCESS)
+                                {
+                                    print_error(
+                                        err,
+                                        "Error: Failed to set arg values \n");
+                                    goto CLEANUP;
+                                }
+                                // clVk2CLExternalSemaphore->wait(cmd_queue1);
+                                size_t global_work_size[3] = { width, height,
+                                                               1 };
+                                cl_event first_launch;
+                                err = clEnqueueNDRangeKernel(
+                                    cmd_queue1, updateKernelCQ1, 2, NULL,
+                                    global_work_size, NULL, 0, NULL,
+                                    &first_launch);
+                                if (err != CL_SUCCESS)
+                                {
+                                    goto CLEANUP;
+                                }
+                                err = clEnqueueNDRangeKernel(
+                                    cmd_queue2, updateKernelCQ2, 2, NULL,
+                                    global_work_size, NULL, 1, &first_launch,
+                                    NULL);
+                                if (err != CL_SUCCESS)
+                                {
+                                    goto CLEANUP;
+                                }
+
+                                clFinish(cmd_queue2);
+                                clCl2VkExternalSemaphore->signal(cmd_queue2);
+                            }
+
+                            unsigned int flags = 0;
+                            size_t mipmapLevelOffset = 0;
+                            cl_event eventReadImage = NULL;
+                            clFinish(cmd_queue2);
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                err = clEnqueueReadImage(
+                                    cmd_queue1, external_mem_image2[i], CL_TRUE,
+                                    origin, region, 0, 0, dstBufferPtr, 0, NULL,
+                                    &eventReadImage);
+
+                                if (err != CL_SUCCESS)
+                                {
+                                    print_error(err,
+                                                "clEnqueueReadImage failed with"
+                                                "error\n");
+                                }
+
+                                if (memcmp(srcBufferPtr, dstBufferPtr,
+                                           srcBufSize))
+                                {
+                                    log_info("Source and destination buffers "
+                                             "don't match\n");
+                                    if (debug_trace)
+                                    {
+                                        log_info("Source buffer contents: \n");
+                                        for (uint64_t sIdx = 0;
+                                             sIdx < srcBufSize; sIdx++)
+                                        {
+                                            log_info(
+                                                "%d ",
+                                                (int)vkSrcBufferDeviceMemoryPtr
+                                                    [sIdx]);
+                                        }
+                                        log_info("Destination buffer contents:"
+                                                 "\n");
+                                        for (uint64_t dIdx = 0;
+                                             dIdx < srcBufSize; dIdx++)
+                                        {
+                                            log_info("%d ",
+                                                     (int)dstBufferPtr[dIdx]);
+                                        }
+                                    }
+                                    err = -1;
+                                    break;
+                                }
+                            }
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                delete vkNonDedicatedImage2DListDeviceMemory1
+                                    [i];
+                                delete vkNonDedicatedImage2DListDeviceMemory2
+                                    [i];
+                                delete nonDedicatedExternalMemory1[i];
+                                delete nonDedicatedExternalMemory2[i];
+                            }
+                            vkNonDedicatedImage2DListDeviceMemory1.erase(
+                                vkNonDedicatedImage2DListDeviceMemory1.begin(),
+                                vkNonDedicatedImage2DListDeviceMemory1.begin()
+                                    + num2DImages);
+                            vkNonDedicatedImage2DListDeviceMemory2.erase(
+                                vkNonDedicatedImage2DListDeviceMemory2.begin(),
+                                vkNonDedicatedImage2DListDeviceMemory2.begin()
+                                    + num2DImages);
+                            nonDedicatedExternalMemory1.erase(
+                                nonDedicatedExternalMemory1.begin(),
+                                nonDedicatedExternalMemory1.begin()
+                                    + num2DImages);
+                            nonDedicatedExternalMemory2.erase(
+                                nonDedicatedExternalMemory2.begin(),
+                                nonDedicatedExternalMemory2.begin()
+                                    + num2DImages);
+                            if (CL_SUCCESS != err)
+                            {
+                                goto CLEANUP;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+CLEANUP:
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+    if (srcBufferPtr) free(srcBufferPtr);
+    if (dstBufferPtr) free(dstBufferPtr);
+    return err;
+}
+
+int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
+                            cl_kernel *kernel_unsigned,
+                            cl_kernel *kernel_signed, cl_kernel *kernel_float,
+                            VulkanDevice &vkDevice)
+{
+    cl_int err = CL_SUCCESS;
+    size_t origin[3] = { 0, 0, 0 };
+    size_t region[3] = { 1, 1, 1 };
+    cl_kernel updateKernelCQ1;
+    std::vector<VulkanFormat> vkFormatList = getSupportedVulkanFormatList();
+    const std::vector<VulkanExternalMemoryHandleType>
+        vkExternalMemoryHandleTypeList =
+            getSupportedVulkanExternalMemoryHandleTypeList();
+    char magicValue = 0;
+
+    VulkanBuffer vkParamsBuffer(vkDevice, sizeof(Params));
+    VulkanDeviceMemory vkParamsDeviceMemory(
+        vkDevice, vkParamsBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
+
+    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
+        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
+    VulkanDeviceMemory vkSrcBufferDeviceMemory(
+        vkDevice, vkSrcBuffer.getSize(),
+        getVulkanMemoryType(vkDevice,
+                            VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
+    vkSrcBufferDeviceMemory.bindBuffer(vkSrcBuffer);
+
+    char *srcBufferPtr, *dstBufferPtr;
+    srcBufferPtr = (char *)malloc(maxImage2DSize);
+    dstBufferPtr = (char *)malloc(maxImage2DSize);
+
+    VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
+        VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1,
+        VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_2D_IMAGE_DESCRIPTORS);
+    VulkanDescriptorSetLayout vkDescriptorSetLayout(
+        vkDevice, vkDescriptorSetLayoutBindingList);
+    VulkanPipelineLayout vkPipelineLayout(vkDevice, vkDescriptorSetLayout);
+
+    VulkanDescriptorPool vkDescriptorPool(vkDevice,
+                                          vkDescriptorSetLayoutBindingList);
+    VulkanDescriptorSet vkDescriptorSet(vkDevice, vkDescriptorPool,
+                                        vkDescriptorSetLayout);
+
+    VulkanCommandPool vkCommandPool(vkDevice);
+    VulkanCommandBuffer vkCopyCommandBuffer(vkDevice, vkCommandPool);
+    VulkanCommandBuffer vkShaderCommandBuffer(vkDevice, vkCommandPool);
+    VulkanQueue &vkQueue = vkDevice.getQueue();
+
+    VulkanExternalSemaphoreHandleType vkExternalSemaphoreHandleType =
+        getSupportedVulkanExternalSemaphoreHandleTypeList()[0];
+    VulkanSemaphore vkVk2CLSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    VulkanSemaphore vkCl2VkSemaphore(vkDevice, vkExternalSemaphoreHandleType);
+    clExternalSemaphore *clVk2CLExternalSemaphore = NULL;
+    clExternalSemaphore *clCl2VkExternalSemaphore = NULL;
+
+    clVk2CLExternalSemaphore = new clExternalSemaphore(
+        vkVk2CLSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+    clCl2VkExternalSemaphore = new clExternalSemaphore(
+        vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
+
+    for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
+    {
+        VulkanFormat vkFormat = vkFormatList[fIdx];
+        log_info("Format: %d\n", vkFormat);
+        uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
+        ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
+        log_info("elementSize= %d\n", elementSize);
+        std::map<std::string, std::string> patternToSubstituteMap;
+        patternToSubstituteMap[GLSL_FORMAT_STRING] =
+            getVulkanFormatGLSLFormat(vkFormat);
+        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
+            getVulkanFormatGLSLTypePrefix(vkFormat);
+
+        VulkanShaderModule vkImage2DShaderModule(
+            vkDevice,
+            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+        VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
+                                                vkImage2DShaderModule);
+
+        for (size_t wIdx = 0; wIdx < ARRAY_SIZE(widthList); wIdx++)
+        {
+            uint32_t width = widthList[wIdx];
+            log_info("Width: %d\n", width);
+            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            region[0] = width;
+            for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
+            {
+                uint32_t height = heightList[hIdx];
+                log_info("Height: %d\n", height);
+                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                region[1] = height;
+
+                uint32_t numMipLevels = 1;
+                log_info("Number of mipmap levels: %d\n", numMipLevels);
+
+                magicValue++;
+                char *vkSrcBufferDeviceMemoryPtr =
+                    (char *)vkSrcBufferDeviceMemory.map();
+                uint64_t srcBufSize = 0;
+                memset(vkSrcBufferDeviceMemoryPtr, 0, maxImage2DSize);
+                memset(srcBufferPtr, 0, maxImage2DSize);
+                uint32_t mipLevel = 0;
+                for (uint32_t row = 0;
+                     row < std::max(height >> mipLevel, uint32_t(1)); row++)
+                {
+                    for (uint32_t col = 0;
+                         col < std::max(width >> mipLevel, uint32_t(1)); col++)
+                    {
+                        for (uint32_t elementByte = 0;
+                             elementByte < elementSize; elementByte++)
+                        {
+                            vkSrcBufferDeviceMemoryPtr[srcBufSize] =
+                                (char)(magicValue + mipLevel + row + col);
+                            srcBufferPtr[srcBufSize] =
+                                (char)(magicValue + mipLevel + row + col);
+                            srcBufSize++;
+                        }
+                    }
+                }
+                srcBufSize = ROUND_UP(
+                    srcBufSize,
+                    std::max(
+                        elementSize,
+                        (uint32_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+                vkSrcBufferDeviceMemory.unmap();
+
+                for (size_t niIdx = 0; niIdx < ARRAY_SIZE(num2DImagesList);
+                     niIdx++)
+                {
+                    uint32_t num2DImages = num2DImagesList[niIdx];
+                    log_info("Number of images: %d\n", num2DImages);
+                    ASSERT_LEQ(num2DImages, (uint32_t)MAX_2D_IMAGES);
+
+                    Params *params = (Params *)vkParamsDeviceMemory.map();
+                    uint32_t num_2D_image;
+                    if (useSingleImageKernel)
+                    {
+                        num_2D_image = 1;
+                    }
+                    else
+                    {
+                        num_2D_image = num2DImages;
+                    }
+                    params->numImage2DDescriptors = num_2D_image * numMipLevels;
+                    vkParamsDeviceMemory.unmap();
+                    vkDescriptorSet.update(0, vkParamsBuffer);
+                    for (size_t emhtIdx = 0;
+                         emhtIdx < vkExternalMemoryHandleTypeList.size();
+                         emhtIdx++)
+                    {
+                        VulkanExternalMemoryHandleType
+                            vkExternalMemoryHandleType =
+                                vkExternalMemoryHandleTypeList[emhtIdx];
+                        log_info("External memory handle type: %d \n",
+                                 vkExternalMemoryHandleType);
+                        if ((true == disableNTHandleType)
+                            && (VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+                                == vkExternalMemoryHandleType))
+                        {
+                            // Skip running for WIN32 NT handle.
+                            continue;
+                        }
+                        VulkanImage2D vkDummyImage2D(
+                            vkDevice, vkFormatList[0], widthList[0],
+                            heightList[0], 1, vkExternalMemoryHandleType);
+                        const VulkanMemoryTypeList &memoryTypeList =
+                            vkDummyImage2D.getMemoryTypeList();
+
+                        std::vector<VulkanDeviceMemory *>
+                            vkNonDedicatedImage2DListDeviceMemory1;
+                        std::vector<VulkanDeviceMemory *>
+                            vkNonDedicatedImage2DListDeviceMemory2;
+                        std::vector<clExternalMemoryImage *>
+                            nonDedicatedExternalMemory1;
+                        std::vector<clExternalMemoryImage *>
+                            nonDedicatedExternalMemory2;
+                        for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
+                             mtIdx++)
+                        {
+                            const VulkanMemoryType &memoryType =
+                                memoryTypeList[mtIdx];
+                            log_info("Memory type index: %d\n",
+                                     (uint32_t)memoryType);
+                            log_info("Memory type property: %d\n",
+                                     memoryType.getMemoryTypeProperty());
+                            if (!useDeviceLocal)
+                            {
+                                if (VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL
+                                    == memoryType.getMemoryTypeProperty())
+                                {
+                                    continue;
+                                }
+                            }
+                            size_t totalImageMemSize = 0;
+                            uint64_t interImageOffset = 0;
+                            {
+                                VulkanImage2D vkImage2D(
+                                    vkDevice, vkFormat, width, height,
+                                    numMipLevels, vkExternalMemoryHandleType);
+                                ASSERT_LEQ(vkImage2D.getSize(), maxImage2DSize);
+                                totalImageMemSize =
+                                    ROUND_UP(vkImage2D.getSize(),
+                                             vkImage2D.getAlignment());
+                            }
+                            VulkanImage2DList vkNonDedicatedImage2DList(
+                                num2DImages, vkDevice, vkFormat, width, height,
+                                numMipLevels, vkExternalMemoryHandleType);
+                            for (size_t bIdx = 0;
+                                 bIdx < vkNonDedicatedImage2DList.size();
+                                 bIdx++)
+                            {
+                                // Create list of Vulkan device memories and
+                                // bind the list of Vulkan images.
+                                vkNonDedicatedImage2DListDeviceMemory1
+                                    .push_back(new VulkanDeviceMemory(
+                                        vkDevice, totalImageMemSize, memoryType,
+                                        vkExternalMemoryHandleType));
+                                vkNonDedicatedImage2DListDeviceMemory1[bIdx]
+                                    ->bindImage(vkNonDedicatedImage2DList[bIdx],
+                                                0);
+                                nonDedicatedExternalMemory1.push_back(
+                                    new clExternalMemoryImage(
+                                        *vkNonDedicatedImage2DListDeviceMemory1
+                                            [bIdx],
+                                        vkExternalMemoryHandleType, context,
+                                        totalImageMemSize, width, height, 0,
+                                        vkNonDedicatedImage2DList[bIdx],
+                                        deviceId));
+                            }
+                            VulkanImageViewList vkNonDedicatedImage2DViewList(
+                                vkDevice, vkNonDedicatedImage2DList);
+
+                            VulkanImage2DList vkNonDedicatedImage2DList2(
+                                num2DImages, vkDevice, vkFormat, width, height,
+                                numMipLevels, vkExternalMemoryHandleType);
+                            for (size_t bIdx = 0;
+                                 bIdx < vkNonDedicatedImage2DList2.size();
+                                 bIdx++)
+                            {
+                                vkNonDedicatedImage2DListDeviceMemory2
+                                    .push_back(new VulkanDeviceMemory(
+                                        vkDevice, totalImageMemSize, memoryType,
+                                        vkExternalMemoryHandleType));
+                                vkNonDedicatedImage2DListDeviceMemory2[bIdx]
+                                    ->bindImage(
+                                        vkNonDedicatedImage2DList2[bIdx], 0);
+                                nonDedicatedExternalMemory2.push_back(
+                                    new clExternalMemoryImage(
+                                        *vkNonDedicatedImage2DListDeviceMemory2
+                                            [bIdx],
+                                        vkExternalMemoryHandleType, context,
+                                        totalImageMemSize, width, height, 0,
+                                        vkNonDedicatedImage2DList2[bIdx],
+                                        deviceId));
+                            }
+                            VulkanImageViewList vkDedicatedImage2DViewList(
+                                vkDevice, vkNonDedicatedImage2DList2);
+                            cl_mem external_mem_image1[4];
+                            cl_mem external_mem_image2[4];
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                external_mem_image1[i] =
+                                    nonDedicatedExternalMemory1[i]
+                                        ->getExternalMemoryImage();
+                                external_mem_image2[i] =
+                                    nonDedicatedExternalMemory2[i]
+                                        ->getExternalMemoryImage();
+                            }
+                            VulkanImage2DList &vkImage2DList =
+                                vkNonDedicatedImage2DList;
+                            VulkanImageViewList &vkImage2DViewList =
+                                vkNonDedicatedImage2DViewList;
+
+                            clCl2VkExternalSemaphore->signal(cmd_queue1);
+                            if (!useSingleImageKernel)
+                            {
+                                for (size_t i2DIdx = 0;
+                                     i2DIdx < vkImage2DList.size(); i2DIdx++)
+                                {
+                                    for (uint32_t mipLevel = 0;
+                                         mipLevel < numMipLevels; mipLevel++)
+                                    {
+                                        uint32_t i2DvIdx =
+                                            (uint32_t)(i2DIdx * numMipLevels)
+                                            + mipLevel;
+                                        vkDescriptorSet.update(
+                                            1 + i2DvIdx,
+                                            vkImage2DViewList[i2DvIdx]);
+                                    }
+                                }
+                                vkCopyCommandBuffer.begin();
+                                vkCopyCommandBuffer.pipelineBarrier(
+                                    vkImage2DList,
+                                    VULKAN_IMAGE_LAYOUT_UNDEFINED,
+                                    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                for (size_t i2DIdx = 0;
+                                     i2DIdx < vkImage2DList.size(); i2DIdx++)
+                                {
+                                    vkCopyCommandBuffer.copyBufferToImage(
+                                        vkSrcBuffer, vkImage2DList[i2DIdx],
+                                        VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                }
+                                vkCopyCommandBuffer.pipelineBarrier(
+                                    vkImage2DList,
+                                    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                    VULKAN_IMAGE_LAYOUT_GENERAL);
+                                vkCopyCommandBuffer.end();
+                                memset(dstBufferPtr, 0, srcBufSize);
+                                vkQueue.submit(vkCopyCommandBuffer);
+                                vkShaderCommandBuffer.begin();
+                                vkShaderCommandBuffer.bindPipeline(
+                                    vkComputePipeline);
+                                vkShaderCommandBuffer.bindDescriptorSets(
+                                    vkComputePipeline, vkPipelineLayout,
+                                    vkDescriptorSet);
+                                vkShaderCommandBuffer.dispatch(
+                                    NUM_BLOCKS(width, NUM_THREADS_PER_GROUP_X),
+                                    NUM_BLOCKS(height,
+                                               NUM_THREADS_PER_GROUP_Y / 2),
+                                    1);
+                                vkShaderCommandBuffer.end();
+                            }
+                            for (uint32_t iter = 0; iter < innerIterations;
+                                 iter++)
+                            {
+                                if (useSingleImageKernel)
+                                {
+                                    for (size_t i2DIdx = 0;
+                                         i2DIdx < vkImage2DList.size();
+                                         i2DIdx++)
+                                    {
+                                        vkDescriptorSet.update(
+                                            1, vkImage2DViewList[i2DIdx]);
+                                        vkCopyCommandBuffer.begin();
+                                        vkCopyCommandBuffer.pipelineBarrier(
+                                            vkImage2DList,
+                                            VULKAN_IMAGE_LAYOUT_UNDEFINED,
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+                                        vkCopyCommandBuffer.copyBufferToImage(
+                                            vkSrcBuffer, vkImage2DList[i2DIdx],
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+                                        vkCopyCommandBuffer.pipelineBarrier(
+                                            vkImage2DList,
+                                            VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                            VULKAN_IMAGE_LAYOUT_GENERAL);
+                                        vkCopyCommandBuffer.end();
+                                        memset(dstBufferPtr, 0, srcBufSize);
+                                        vkQueue.submit(vkCopyCommandBuffer);
+                                        vkShaderCommandBuffer.begin();
+                                        vkShaderCommandBuffer.bindPipeline(
+                                            vkComputePipeline);
+                                        vkShaderCommandBuffer
+                                            .bindDescriptorSets(
+                                                vkComputePipeline,
+                                                vkPipelineLayout,
+                                                vkDescriptorSet);
+                                        vkShaderCommandBuffer.dispatch(
+                                            NUM_BLOCKS(width,
+                                                       NUM_THREADS_PER_GROUP_X),
+                                            NUM_BLOCKS(height,
+                                                       NUM_THREADS_PER_GROUP_Y
+                                                           / 2),
+                                            1);
+                                        vkShaderCommandBuffer.end();
+                                        if (i2DIdx < vkImage2DList.size() - 1)
+                                        {
+                                            vkQueue.submit(
+                                                vkShaderCommandBuffer);
+                                        }
+                                    }
+                                }
+                                vkQueue.submit(vkCl2VkSemaphore,
+                                               vkShaderCommandBuffer,
+                                               vkVk2CLSemaphore);
+                                clVk2CLExternalSemaphore->wait(cmd_queue1);
+                                switch (num2DImages)
+                                {
+                                    case 1:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[0],
+                                            kernel_signed[0],
+                                            kernel_unsigned[0]);
+                                        break;
+                                    case 2:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[1],
+                                            kernel_signed[1],
+                                            kernel_unsigned[1]);
+                                        break;
+                                    case 4:
+                                        updateKernelCQ1 = getKernelType(
+                                            vkFormat, kernel_float[2],
+                                            kernel_signed[2],
+                                            kernel_unsigned[2]);
+                                        break;
+                                }
+                                int j = 0;
+                                for (int i = 0; i < num2DImages; i++, ++j)
+                                {
+                                    err = clSetKernelArg(
+                                        updateKernelCQ1, j, sizeof(cl_mem),
+                                        &external_mem_image1[i]);
+                                    err |= clSetKernelArg(
+                                        updateKernelCQ1, ++j, sizeof(cl_mem),
+                                        &external_mem_image2[i]);
+                                }
+                                err |= clSetKernelArg(updateKernelCQ1, j,
+                                                      sizeof(unsigned int),
+                                                      &num2DImages);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &width);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &height);
+                                err |= clSetKernelArg(updateKernelCQ1, ++j,
+                                                      sizeof(unsigned int),
+                                                      &numMipLevels);
+
+                                if (err != CL_SUCCESS)
+                                {
+                                    print_error(err,
+                                                "Error: Failed to set arg "
+                                                "values for kernel-1\n");
+                                    goto CLEANUP;
+                                }
+
+                                size_t global_work_size[3] = { width, height,
+                                                               1 };
+                                err = clEnqueueNDRangeKernel(
+                                    cmd_queue1, updateKernelCQ1, 2, NULL,
+                                    global_work_size, NULL, 0, NULL, NULL);
+                                if (err != CL_SUCCESS)
+                                {
+                                    goto CLEANUP;
+                                }
+                                clCl2VkExternalSemaphore->signal(cmd_queue1);
+                            }
+
+                            unsigned int flags = 0;
+                            size_t mipmapLevelOffset = 0;
+                            cl_event eventReadImage = NULL;
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                err = clEnqueueReadImage(
+                                    cmd_queue1, external_mem_image2[i], CL_TRUE,
+                                    origin, region, 0, 0, dstBufferPtr, 0, NULL,
+                                    &eventReadImage);
+
+                                if (err != CL_SUCCESS)
+                                {
+                                    print_error(err,
+                                                "clEnqueueReadImage failed with"
+                                                "error\n");
+                                }
+
+                                if (memcmp(srcBufferPtr, dstBufferPtr,
+                                           srcBufSize))
+                                {
+                                    log_info("Source and destination buffers "
+                                             "don't match\n");
+                                    if (debug_trace)
+                                    {
+                                        log_info("Source buffer contents: \n");
+                                        for (uint64_t sIdx = 0;
+                                             sIdx < srcBufSize; sIdx++)
+                                        {
+                                            log_info(
+                                                "%d",
+                                                (int)vkSrcBufferDeviceMemoryPtr
+                                                    [sIdx]);
+                                        }
+                                        log_info(
+                                            "Destination buffer contents:");
+                                        for (uint64_t dIdx = 0;
+                                             dIdx < srcBufSize; dIdx++)
+                                        {
+                                            log_info("%d",
+                                                     (int)dstBufferPtr[dIdx]);
+                                        }
+                                    }
+                                    err = -1;
+                                    break;
+                                }
+                            }
+                            for (int i = 0; i < num2DImages; i++)
+                            {
+                                delete vkNonDedicatedImage2DListDeviceMemory1
+                                    [i];
+                                delete vkNonDedicatedImage2DListDeviceMemory2
+                                    [i];
+                                delete nonDedicatedExternalMemory1[i];
+                                delete nonDedicatedExternalMemory2[i];
+                            }
+                            vkNonDedicatedImage2DListDeviceMemory1.erase(
+                                vkNonDedicatedImage2DListDeviceMemory1.begin(),
+                                vkNonDedicatedImage2DListDeviceMemory1.begin()
+                                    + num2DImages);
+                            vkNonDedicatedImage2DListDeviceMemory2.erase(
+                                vkNonDedicatedImage2DListDeviceMemory2.begin(),
+                                vkNonDedicatedImage2DListDeviceMemory2.begin()
+                                    + num2DImages);
+                            nonDedicatedExternalMemory1.erase(
+                                nonDedicatedExternalMemory1.begin(),
+                                nonDedicatedExternalMemory1.begin()
+                                    + num2DImages);
+                            nonDedicatedExternalMemory2.erase(
+                                nonDedicatedExternalMemory2.begin(),
+                                nonDedicatedExternalMemory2.begin()
+                                    + num2DImages);
+                            if (CL_SUCCESS != err)
+                            {
+                                goto CLEANUP;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+CLEANUP:
+    if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
+    if (clCl2VkExternalSemaphore) delete clCl2VkExternalSemaphore;
+
+    if (srcBufferPtr) free(srcBufferPtr);
+    if (dstBufferPtr) free(dstBufferPtr);
+    return err;
+}
+
+int test_image_common(cl_device_id device_, cl_context context_,
+                      cl_command_queue queue_, int numElements_)
+{
+    int current_device = 0;
+    int device_count = 0;
+    int devices_prohibited = 0;
+    cl_int err = CL_SUCCESS;
+    cl_platform_id platform = NULL;
+    size_t extensionSize = 0;
+    cl_uint num_devices = 0;
+    cl_uint device_no = 0;
+    cl_device_id *devices;
+    char *extensions = NULL;
+    const char *program_source_const;
+    cl_command_queue cmd_queue1 = NULL;
+    cl_command_queue cmd_queue2 = NULL;
+    cl_context context = NULL;
+    const uint32_t num_kernels = ARRAY_SIZE(num2DImagesList) + 1;
+    // One kernel for Cross-CQ case
+    const uint32_t num_kernel_types = 3;
+    const char *kernel_source[num_kernels] = { kernel_text_numImage_1,
+                                               kernel_text_numImage_2,
+                                               kernel_text_numImage_4 };
+    char source_1[4096];
+    char source_2[4096];
+    char source_3[4096];
+    size_t program_source_length;
+    cl_program program[num_kernel_types];
+    cl_kernel kernel_float[num_kernels] = { NULL, NULL, NULL, NULL };
+    cl_kernel kernel_signed[num_kernels] = { NULL, NULL, NULL, NULL };
+    cl_kernel kernel_unsigned[num_kernels] = { NULL, NULL, NULL, NULL };
+    cl_mem external_mem_image1;
+    cl_mem external_mem_image2;
+
+    VulkanDevice vkDevice;
+
+    cl_context_properties contextProperties[] = { CL_CONTEXT_PLATFORM, 0, 0 };
+    // get the platform ID
+    err = clGetPlatformIDs(1, &platform, NULL);
+    if (err != CL_SUCCESS)
+    {
+        print_error(err, "Error: Failed to get platform\n");
+        goto CLEANUP;
+    }
+
+    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+    if (CL_SUCCESS != err)
+    {
+        print_error(err, "clGetDeviceIDs failed in returning no. of devices\n");
+        goto CLEANUP;
+    }
+    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+    if (NULL == devices)
+    {
+        err = CL_OUT_OF_HOST_MEMORY;
+        print_error(err, "Unable to allocate memory for devices\n");
+        goto CLEANUP;
+    }
+    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+                         NULL);
+    if (CL_SUCCESS != err)
+    {
+        print_error(err, "Failed to get deviceID.\n");
+        goto CLEANUP;
+    }
+    contextProperties[1] = (cl_context_properties)platform;
+    log_info("Assigned contextproperties for platform\n");
+    for (device_no = 0; device_no < num_devices; device_no++)
+    {
+        err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0, NULL,
+                              &extensionSize);
+        if (CL_SUCCESS != err)
+        {
+            print_error(
+                err,
+                "Error in clGetDeviceInfo for getting device_extension size\n");
+            goto CLEANUP;
+        }
+        extensions = (char *)malloc(extensionSize);
+        if (NULL == extensions)
+        {
+            err = CL_OUT_OF_HOST_MEMORY;
+            print_error(err, "Unable to allocate memory for extensions\n");
+            goto CLEANUP;
+        }
+        err = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+                              extensionSize, extensions, NULL);
+        if (CL_SUCCESS != err)
+        {
+            print_error(
+                err, "Error in clGetDeviceInfo for getting device_extension\n");
+            goto CLEANUP;
+        }
+        err = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+                              CL_UUID_SIZE_KHR, uuid, &extensionSize);
+        if (CL_SUCCESS != err)
+        {
+            print_error(err, "clGetDeviceInfo failed with error");
+            goto CLEANUP;
+        }
+        err =
+            memcmp(uuid, vkDevice.getPhysicalDevice().getUUID(), VK_UUID_SIZE);
+        if (err == 0)
+        {
+            break;
+        }
+    }
+    if (device_no >= num_devices)
+    {
+        err = EXIT_FAILURE;
+        print_error(err,
+                    "OpenCL error:"
+                    "No Vulkan-OpenCL Interop capable GPU found.\n");
+        goto CLEANUP;
+    }
+    deviceId = devices[device_no];
+    context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
+                                      NULL, NULL, &err);
+    if (CL_SUCCESS != err)
+    {
+        print_error(err, "error creating context");
+        goto CLEANUP;
+    }
+    log_info("Successfully created context !!!\n");
+
+    cmd_queue1 = clCreateCommandQueue(context, devices[device_no], 0, &err);
+    if (CL_SUCCESS != err)
+    {
+        err = CL_INVALID_COMMAND_QUEUE;
+        print_error(err, "Error: Failed to create command queue!\n");
+        goto CLEANUP;
+    }
+    log_info("clCreateCommandQueue successfull \n");
+
+    cmd_queue2 = clCreateCommandQueue(context, devices[device_no], 0, &err);
+    if (CL_SUCCESS != err)
+    {
+        err = CL_INVALID_COMMAND_QUEUE;
+        print_error(err, "Error: Failed to create command queue!\n");
+        goto CLEANUP;
+    }
+    log_info("clCreateCommandQueue2 successful \n");
+
+    for (int i = 0; i < num_kernels; i++)
+    {
+        switch (i)
+        {
+            case 0:
+                sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+                        "f", "f", "f");
+                sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+                        "i", "i");
+                sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+                        "ui", "ui", "ui");
+                break;
+            case 1:
+                sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+                        "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
+                sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+                        "int4", "i", "int4", "i", "i", "i", "i", "i");
+                sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+                        "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
+                        "ui");
+                break;
+            case 2:
+                sprintf(source_1, kernel_source[i], "float4", "f", "float4",
+                        "f", "float4", "f", "float4", "f", "float4", "f",
+                        "float4", "f", "float4", "f", "float4", "f", "f", "f",
+                        "f", "f", "f", "f", "f", "f");
+                sprintf(source_2, kernel_source[i], "int4", "i", "int4", "i",
+                        "int4", "i", "int4", "i", "int4", "i", "int4", "i",
+                        "int4", "i", "int4", "i", "i", "i", "i", "i", "i", "i",
+                        "i", "i");
+                sprintf(source_3, kernel_source[i], "uint4", "ui", "uint4",
+                        "ui", "uint4", "ui", "uint4", "ui", "uint4", "ui",
+                        "uint4", "ui", "uint4", "ui", "uint4", "ui", "ui", "ui",
+                        "ui", "ui", "ui", "ui", "ui", "ui");
+                break;
+            case 3:
+                // Addtional case for creating updateKernelCQ2 which takes two
+                // images
+                sprintf(source_1, kernel_source[1], "float4", "f", "float4",
+                        "f", "float4", "f", "float4", "f", "f", "f", "f", "f");
+                sprintf(source_2, kernel_source[1], "int4", "i", "int4", "i",
+                        "int4", "i", "int4", "i", "i", "i", "i", "i");
+                sprintf(source_3, kernel_source[1], "uint4", "ui", "uint4",
+                        "ui", "uint4", "ui", "uint4", "ui", "ui", "ui", "ui",
+                        "ui");
+                break;
+        }
+        const char *sourceTexts[num_kernel_types] = { source_1, source_2,
+                                                      source_3 };
+        for (int k = 0; k < num_kernel_types; k++)
+        {
+            program_source_length = strlen(sourceTexts[k]);
+            program[k] = clCreateProgramWithSource(
+                context, 1, &sourceTexts[k], &program_source_length, &err);
+            err |= clBuildProgram(program[k], 0, NULL, NULL, NULL, NULL);
+        }
+
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "Error: Failed to build program");
+            goto CLEANUP;
+        }
+        // create the kernel
+        kernel_float[i] = clCreateKernel(program[0], "image2DKernel", &err);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clCreateKernel failed");
+            goto CLEANUP;
+        }
+        kernel_signed[i] = clCreateKernel(program[1], "image2DKernel", &err);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clCreateKernel failed");
+            goto CLEANUP;
+        }
+        kernel_unsigned[i] = clCreateKernel(program[2], "image2DKernel", &err);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clCreateKernel failed ");
+            goto CLEANUP;
+        }
+    }
+    if (numCQ == 2)
+    {
+        err = run_test_with_two_queue(context, cmd_queue1, cmd_queue2,
+                                      kernel_unsigned, kernel_signed,
+                                      kernel_float, vkDevice);
+    }
+    else
+    {
+        err = run_test_with_one_queue(context, cmd_queue1, kernel_unsigned,
+                                      kernel_signed, kernel_float, vkDevice);
+    }
+CLEANUP:
+    for (int i = 0; i < num_kernels; i++)
+    {
+        if (kernel_float[i])
+        {
+            clReleaseKernel(kernel_float[i]);
+        }
+        if (kernel_unsigned[i])
+        {
+            clReleaseKernel(kernel_unsigned[i]);
+        }
+        if (kernel_signed[i])
+        {
+            clReleaseKernel(kernel_signed[i]);
+        }
+    }
+    for (int i = 0; i < num_kernel_types; i++)
+    {
+        if (program[i])
+        {
+            clReleaseProgram(program[i]);
+        }
+    }
+    if (cmd_queue1) clReleaseCommandQueue(cmd_queue1);
+    if (cmd_queue2) clReleaseCommandQueue(cmd_queue2);
+    if (context) clReleaseContext(context);
+
+    if (extensions) free(extensions);
+    if (devices) free(devices);
+
+    return err;
+}
diff --git a/test_conformance/vulkan/test_vulkan_platform_device_info.cpp b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp
new file mode 100644
index 00000000..12f373b5
--- /dev/null
+++ b/test_conformance/vulkan/test_vulkan_platform_device_info.cpp
@@ -0,0 +1,146 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include "harness/testHarness.h"
+#include <iostream>
+#include <string>
+
+typedef struct
+{
+    cl_uint info;
+    const char *name;
+} _info;
+
+_info platform_info_table[] = {
+#define STRING(x)                                                              \
+    {                                                                          \
+        x, #x                                                                  \
+    }
+    STRING(CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR),
+    STRING(CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+    STRING(CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR)
+#undef STRING
+};
+
+_info device_info_table[] = {
+#define STRING(x)                                                              \
+    {                                                                          \
+        x, #x                                                                  \
+    }
+    STRING(CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR),
+    STRING(CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR),
+    STRING(CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR)
+#undef STRING
+};
+
+int test_platform_info(cl_device_id deviceID, cl_context _context,
+                       cl_command_queue _queue, int num_elements)
+{
+    cl_uint num_platforms;
+    cl_uint i, j;
+    cl_platform_id *platforms;
+    cl_int errNum;
+    cl_uint *handle_type;
+    size_t handle_type_size = 0;
+    cl_uint num_handles = 0;
+
+    // get total # of platforms
+    errNum = clGetPlatformIDs(0, NULL, &num_platforms);
+    test_error(errNum, "clGetPlatformIDs (getting count) failed");
+
+    platforms =
+        (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id));
+    if (!platforms)
+    {
+        printf("error allocating memory\n");
+        exit(1);
+    }
+    log_info("%d platforms available\n", num_platforms);
+    errNum = clGetPlatformIDs(num_platforms, platforms, NULL);
+    test_error(errNum, "clGetPlatformIDs (getting IDs) failed");
+
+    for (i = 0; i < num_platforms; i++)
+    {
+        log_info("Platform%d (id %lu) info:\n", i, (unsigned long)platforms[i]);
+        for (j = 0;
+             j < sizeof(platform_info_table) / sizeof(platform_info_table[0]);
+             j++)
+        {
+            errNum =
+                clGetPlatformInfo(platforms[i], platform_info_table[j].info, 0,
+                                  NULL, &handle_type_size);
+            test_error(errNum, "clGetPlatformInfo failed");
+            num_handles = handle_type_size / sizeof(cl_uint);
+            handle_type = (cl_uint *)malloc(handle_type_size);
+            errNum =
+                clGetPlatformInfo(platforms[i], platform_info_table[j].info,
+                                  handle_type_size, handle_type, NULL);
+            test_error(errNum, "clGetPlatformInfo failed");
+
+            log_info("%s: \n", platform_info_table[j].name);
+            while (num_handles--)
+            {
+                log_info("%x \n", handle_type[num_handles]);
+            }
+            if (handle_type)
+            {
+                free(handle_type);
+            }
+        }
+    }
+    if (platforms)
+    {
+        free(platforms);
+    }
+    return TEST_PASS;
+}
+
+int test_device_info(cl_device_id deviceID, cl_context _context,
+                     cl_command_queue _queue, int num_elements)
+{
+    cl_uint j;
+    cl_uint *handle_type;
+    size_t handle_type_size = 0;
+    cl_uint num_handles = 0;
+    cl_int errNum = CL_SUCCESS;
+    for (j = 0; j < sizeof(device_info_table) / sizeof(device_info_table[0]);
+         j++)
+    {
+        errNum = clGetDeviceInfo(deviceID, device_info_table[j].info, 0, NULL,
+                                 &handle_type_size);
+        test_error(errNum, "clGetDeviceInfo failed");
+
+        num_handles = handle_type_size / sizeof(cl_uint);
+        handle_type = (cl_uint *)malloc(handle_type_size);
+
+        errNum = clGetDeviceInfo(deviceID, device_info_table[j].info,
+                                 handle_type_size, handle_type, NULL);
+        test_error(errNum, "clGetDeviceInfo failed");
+
+        log_info("%s: \n", device_info_table[j].name);
+        while (num_handles--)
+        {
+            log_info("%x \n", handle_type[num_handles]);
+        }
+        if (handle_type)
+        {
+            free(handle_type);
+        }
+    }
+    return TEST_PASS;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
new file mode 100644
index 00000000..136818f6
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
@@ -0,0 +1,818 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include <CL/cl_ext.h>
+#include "opencl_vulkan_wrapper.hpp"
+#include "vulkan_wrapper.hpp"
+#include "harness/errorHelpers.h"
+#include "harness/deviceInfo.h"
+#include <assert.h>
+#include <iostream>
+#include <stdexcept>
+
+#define ASSERT(x) assert((x))
+
+pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr;
+pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
+pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr;
+pfnclEnqueueAcquireExternalMemObjectsKHR
+    clEnqueueAcquireExternalMemObjectsKHRptr;
+pfnclEnqueueReleaseExternalMemObjectsKHR
+    clEnqueueReleaseExternalMemObjectsKHRptr;
+pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+
+void init_cl_vk_ext(cl_platform_id opencl_platform)
+{
+    clEnqueueWaitSemaphoresKHRptr =
+        (pfnclEnqueueWaitSemaphoresKHR)clGetExtensionFunctionAddressForPlatform(
+            opencl_platform, "clEnqueueWaitSemaphoresKHR");
+    if (NULL == clEnqueueWaitSemaphoresKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clEnqueueWaitSemaphoresKHRptr!");
+    }
+    clEnqueueSignalSemaphoresKHRptr = (pfnclEnqueueSignalSemaphoresKHR)
+        clGetExtensionFunctionAddressForPlatform(
+            opencl_platform, "clEnqueueSignalSemaphoresKHR");
+    if (NULL == clEnqueueSignalSemaphoresKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clEnqueueSignalSemaphoresKHRptr!");
+    }
+    clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR)
+        clGetExtensionFunctionAddressForPlatform(opencl_platform,
+                                                 "clReleaseSemaphoreObjectKHR");
+    if (NULL == clReleaseSemaphoreObjectKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clReleaseSemaphoreObjectKHRptr!");
+    }
+    clCreateSemaphoreWithPropertiesKHRptr =
+        (pfnclCreateSemaphoreWithPropertiesKHR)
+            clGetExtensionFunctionAddressForPlatform(
+                opencl_platform, "clCreateSemaphoreWithPropertiesKHR");
+    if (NULL == clCreateSemaphoreWithPropertiesKHRptr)
+    {
+        throw std::runtime_error("Failed to get the function pointer of "
+                                 "clCreateSemaphoreWithPropertiesKHRptr!");
+    }
+}
+
+cl_int getCLFormatFromVkFormat(VkFormat vkFormat,
+                               cl_image_format *clImageFormat)
+{
+    cl_int result = CL_SUCCESS;
+    switch (vkFormat)
+    {
+        case VK_FORMAT_R8G8B8A8_UNORM:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+            break;
+        case VK_FORMAT_B8G8R8A8_UNORM:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+            break;
+        case VK_FORMAT_R16G16B16A16_UNORM:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+            break;
+        case VK_FORMAT_R8G8B8A8_SINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+            break;
+        case VK_FORMAT_R16G16B16A16_SINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+            break;
+        case VK_FORMAT_R32G32B32A32_SINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+            break;
+        case VK_FORMAT_R8G8B8A8_UINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+            break;
+        case VK_FORMAT_R16G16B16A16_UINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+            break;
+        case VK_FORMAT_R32G32B32A32_UINT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+            break;
+        case VK_FORMAT_R16G16B16A16_SFLOAT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+            break;
+        case VK_FORMAT_R32G32B32A32_SFLOAT:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_FLOAT;
+            break;
+        case VK_FORMAT_R8_SNORM:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+            break;
+        case VK_FORMAT_R16_SNORM:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+            break;
+        case VK_FORMAT_R8_UNORM:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+            break;
+        case VK_FORMAT_R16_UNORM:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+            break;
+        case VK_FORMAT_R8_SINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+            break;
+        case VK_FORMAT_R16_SINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+            break;
+        case VK_FORMAT_R32_SINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+            break;
+        case VK_FORMAT_R8_UINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+            break;
+        case VK_FORMAT_R16_UINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+            break;
+        case VK_FORMAT_R32_UINT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+            break;
+        case VK_FORMAT_R16_SFLOAT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+            break;
+        case VK_FORMAT_R32_SFLOAT:
+            clImageFormat->image_channel_order = CL_R;
+            clImageFormat->image_channel_data_type = CL_FLOAT;
+            break;
+        case VK_FORMAT_R8G8_SNORM:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+            break;
+        case VK_FORMAT_R16G16_SNORM:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+            break;
+        case VK_FORMAT_R8G8_UNORM:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT8;
+            break;
+        case VK_FORMAT_R16G16_UNORM:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_UNORM_INT16;
+            break;
+        case VK_FORMAT_R8G8_SINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+            break;
+        case VK_FORMAT_R16G16_SINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT16;
+            break;
+        case VK_FORMAT_R32G32_SINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT32;
+            break;
+        case VK_FORMAT_R8G8_UINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+            break;
+        case VK_FORMAT_R16G16_UINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT16;
+            break;
+        case VK_FORMAT_R32G32_UINT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT32;
+            break;
+        case VK_FORMAT_R16G16_SFLOAT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_HALF_FLOAT;
+            break;
+        case VK_FORMAT_R32G32_SFLOAT:
+            clImageFormat->image_channel_order = CL_RG;
+            clImageFormat->image_channel_data_type = CL_FLOAT;
+            break;
+        case VK_FORMAT_R5G6B5_UNORM_PACK16:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565;
+            break;
+        case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555;
+            break;
+        case VK_FORMAT_R8G8B8A8_SNORM:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+            break;
+        case VK_FORMAT_R16G16B16A16_SNORM:
+            clImageFormat->image_channel_order = CL_RGBA;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT16;
+            break;
+        case VK_FORMAT_B8G8R8A8_SNORM:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_SNORM_INT8;
+            break;
+        case VK_FORMAT_B5G6R5_UNORM_PACK16:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_UNORM_SHORT_565;
+            break;
+        case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_UNORM_SHORT_555;
+            break;
+        case VK_FORMAT_B8G8R8A8_SINT:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_SIGNED_INT8;
+            break;
+        case VK_FORMAT_B8G8R8A8_UINT:
+            clImageFormat->image_channel_order = CL_BGRA;
+            clImageFormat->image_channel_data_type = CL_UNSIGNED_INT8;
+            break;
+        case VK_FORMAT_A8B8G8R8_SNORM_PACK32: result = CL_INVALID_VALUE; break;
+        case VK_FORMAT_A8B8G8R8_UNORM_PACK32: result = CL_INVALID_VALUE; break;
+        case VK_FORMAT_A8B8G8R8_SINT_PACK32: result = CL_INVALID_VALUE; break;
+        case VK_FORMAT_A8B8G8R8_UINT_PACK32: result = CL_INVALID_VALUE; break;
+        default:
+            log_error("Unsupported format\n");
+            ASSERT(0);
+            break;
+    }
+    return result;
+}
+
+cl_mem_object_type getImageTypeFromVk(VkImageType imageType)
+{
+    cl_mem_object_type cl_image_type = CL_INVALID_VALUE;
+    switch (imageType)
+    {
+        case VK_IMAGE_TYPE_1D: cl_image_type = CL_MEM_OBJECT_IMAGE1D; break;
+        case VK_IMAGE_TYPE_2D: cl_image_type = CL_MEM_OBJECT_IMAGE2D; break;
+        case VK_IMAGE_TYPE_3D: cl_image_type = CL_MEM_OBJECT_IMAGE3D; break;
+        default: break;
+    }
+    return cl_image_type;
+}
+
+size_t GetElementNBytes(const cl_image_format *format)
+{
+    size_t result;
+
+    switch (format->image_channel_order)
+    {
+        case CL_R:
+        case CL_A:
+        case CL_INTENSITY:
+        case CL_LUMINANCE:
+        case CL_DEPTH: result = 1; break;
+        case CL_RG:
+        case CL_RA: result = 2; break;
+        case CL_RGB: result = 3; break;
+        case CL_RGBA:
+        case CL_ARGB:
+        case CL_BGRA:
+        case CL_sRGBA: result = 4; break;
+        default: result = 0; break;
+    }
+
+    switch (format->image_channel_data_type)
+    {
+        case CL_SNORM_INT8:
+        case CL_UNORM_INT8:
+        case CL_SIGNED_INT8:
+        case CL_UNSIGNED_INT8:
+            // result *= 1;
+            break;
+
+        case CL_SNORM_INT16:
+        case CL_UNORM_INT16:
+        case CL_SIGNED_INT16:
+        case CL_UNSIGNED_INT16:
+        case CL_HALF_FLOAT: result *= 2; break;
+
+        case CL_SIGNED_INT32:
+        case CL_UNSIGNED_INT32:
+        case CL_FLOAT: result *= 4; break;
+
+        case CL_UNORM_SHORT_565:
+        case CL_UNORM_SHORT_555:
+            if (result == 3)
+            {
+                result = 2;
+            }
+            else
+            {
+                result = 0;
+            }
+            break;
+
+        case CL_UNORM_INT_101010:
+            if (result == 3)
+            {
+                result = 4;
+            }
+            else
+            {
+                result = 0;
+            }
+            break;
+
+        default: result = 0; break;
+    }
+
+    return result;
+}
+
+cl_int get2DImageDimensions(const VkImageCreateInfo *VulkanImageCreateInfo,
+                            cl_image_format *img_fmt, size_t totalImageSize,
+                            size_t &width, size_t &height)
+{
+    cl_int result = CL_SUCCESS;
+    if (totalImageSize == 0)
+    {
+        result = CL_INVALID_VALUE;
+    }
+    size_t element_size = GetElementNBytes(img_fmt);
+    size_t row_pitch = element_size * VulkanImageCreateInfo->extent.width;
+    row_pitch = row_pitch % 64 == 0 ? row_pitch : ((row_pitch / 64) + 1) * 64;
+
+    width = row_pitch / element_size;
+    height = totalImageSize / row_pitch;
+
+    return result;
+}
+
+cl_int
+getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *VulkanImageCreateInfo,
+                              size_t totalImageSize, cl_image_format *img_fmt,
+                              cl_image_desc *img_desc)
+{
+    cl_int result = CL_SUCCESS;
+
+    cl_image_format clImgFormat = { 0 };
+    result =
+        getCLFormatFromVkFormat(VulkanImageCreateInfo->format, &clImgFormat);
+    if (CL_SUCCESS != result)
+    {
+        return result;
+    }
+    memcpy(img_fmt, &clImgFormat, sizeof(cl_image_format));
+
+    img_desc->image_type = getImageTypeFromVk(VulkanImageCreateInfo->imageType);
+    if (CL_INVALID_VALUE == img_desc->image_type)
+    {
+        return CL_INVALID_VALUE;
+    }
+
+    result =
+        get2DImageDimensions(VulkanImageCreateInfo, img_fmt, totalImageSize,
+                             img_desc->image_width, img_desc->image_height);
+    if (CL_SUCCESS != result)
+    {
+        throw std::runtime_error("get2DImageDimensions failed!!!");
+    }
+
+    img_desc->image_depth = 0; // VulkanImageCreateInfo->extent.depth;
+    img_desc->image_array_size = 0;
+    img_desc->image_row_pitch = 0; // Row pitch set to zero as host_ptr is NULL
+    img_desc->image_slice_pitch =
+        img_desc->image_row_pitch * img_desc->image_height;
+    img_desc->num_mip_levels = 1;
+    img_desc->num_samples = 0;
+    img_desc->buffer = NULL;
+
+    return result;
+}
+
+cl_int check_external_memory_handle_type(
+    cl_device_id deviceID,
+    cl_external_memory_handle_type_khr requiredHandleType)
+{
+    unsigned int i;
+    cl_external_memory_handle_type_khr *handle_type;
+    size_t handle_type_size = 0;
+
+    cl_int errNum = CL_SUCCESS;
+
+    errNum = clGetDeviceInfo(deviceID,
+                             CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR,
+                             0, NULL, &handle_type_size);
+    handle_type =
+        (cl_external_memory_handle_type_khr *)malloc(handle_type_size);
+
+    errNum = clGetDeviceInfo(deviceID,
+                             CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR,
+                             handle_type_size, handle_type, NULL);
+
+    test_error(
+        errNum,
+        "Unable to query CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR \n");
+
+    for (i = 0; i < handle_type_size; i++)
+    {
+        if (requiredHandleType == handle_type[i])
+        {
+            return CL_SUCCESS;
+        }
+    }
+    log_error("cl_khr_external_memory extension is missing support for %d\n",
+              requiredHandleType);
+
+    return CL_INVALID_VALUE;
+}
+
+cl_int check_external_semaphore_handle_type(
+    cl_device_id deviceID,
+    cl_external_semaphore_handle_type_khr requiredHandleType)
+{
+    unsigned int i;
+    cl_external_semaphore_handle_type_khr *handle_type;
+    size_t handle_type_size = 0;
+    cl_int errNum = CL_SUCCESS;
+
+    errNum =
+        clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                        0, NULL, &handle_type_size);
+    handle_type =
+        (cl_external_semaphore_handle_type_khr *)malloc(handle_type_size);
+
+    errNum =
+        clGetDeviceInfo(deviceID, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR,
+                        handle_type_size, handle_type, NULL);
+
+    test_error(
+        errNum,
+        "Unable to query CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR \n");
+
+    for (i = 0; i < handle_type_size; i++)
+    {
+        if (requiredHandleType == handle_type[i])
+        {
+            return CL_SUCCESS;
+        }
+    }
+    log_error("cl_khr_external_semaphore extension is missing support for %d\n",
+              requiredHandleType);
+
+    return CL_INVALID_VALUE;
+}
+clExternalMemory::clExternalMemory() {}
+
+clExternalMemory::clExternalMemory(const clExternalMemory &externalMemory)
+    : m_externalMemory(externalMemory.m_externalMemory)
+{}
+
+clExternalMemory::clExternalMemory(
+    const VulkanDeviceMemory *deviceMemory,
+    VulkanExternalMemoryHandleType externalMemoryHandleType, uint64_t offset,
+    uint64_t size, cl_context context, cl_device_id deviceId)
+{
+    int err = 0;
+    m_externalMemory = NULL;
+    cl_device_id devList[] = { deviceId, NULL };
+    std::vector<cl_mem_properties> extMemProperties;
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_win32 extension\n");
+    }
+#else
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_opaque_fd "
+            "extension \n");
+    }
+#endif
+
+    switch (externalMemoryHandleType)
+    {
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+#ifdef _WIN32
+            ASSERT(0);
+#endif
+            log_info("Opaque file descriptors are not supported on Windows\n");
+            fd = (int)deviceMemory->getHandle(externalMemoryHandleType);
+            err = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties.push_back((cl_mem_properties)fd);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+#ifndef _WIN32
+            ASSERT(0);
+#else
+            log_info(" Opaque NT handles are only supported on Windows\n");
+            handle = deviceMemory->getHandle(externalMemoryHandleType);
+            err = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+#endif
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+#ifndef _WIN32
+            ASSERT(0);
+#else
+            log_info("Opaque D3DKMT handles are only supported on Windows\n");
+            handle = deviceMemory->getHandle(externalMemoryHandleType);
+            err = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back(
+                (cl_mem_properties)
+                    CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties.push_back((cl_mem_properties)handle);
+#endif
+            break;
+        default:
+            ASSERT(0);
+            log_error("Unsupported external memory handle type\n");
+            break;
+    }
+    if (CL_SUCCESS != err)
+    {
+        throw std::runtime_error("Unsupported external memory type\n ");
+    }
+
+    extMemProperties.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
+    extMemProperties.push_back((cl_mem_properties)devList[0]);
+    extMemProperties.push_back(
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
+    extMemProperties.push_back(0);
+
+    m_externalMemory = clCreateBufferWithProperties(
+        context, extMemProperties.data(), 1, size, NULL, &err);
+    if (CL_SUCCESS != err)
+    {
+        log_error("clCreateBufferWithProperties failed with %d\n", err);
+        throw std::runtime_error("clCreateBufferWithProperties failed ");
+    }
+}
+clExternalMemoryImage::clExternalMemoryImage(
+    const VulkanDeviceMemory &deviceMemory,
+    VulkanExternalMemoryHandleType externalMemoryHandleType, cl_context context,
+    size_t totalImageMemSize, size_t imageWidth, size_t imageHeight,
+    size_t totalSize, const VulkanImage2D &image2D, cl_device_id deviceId)
+{
+    cl_int errcode_ret = 0;
+    std::vector<cl_mem_properties> extMemProperties1;
+    cl_device_id devList[] = { deviceId, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_win32"))
+    {
+        throw std::runtime_error("Device does not support "
+                                 "cl_khr_external_memory_win32 extension \n");
+    }
+#elif !defined(__APPLE__)
+    if (!is_extension_available(devList[0], "cl_khr_external_memory_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_memory_opaque_fd "
+            "extension\n");
+    }
+#endif
+
+    switch (externalMemoryHandleType)
+    {
+#ifdef _WIN32
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            log_info("Opaque NT handles are only supported on Windows\n");
+            handle = deviceMemory.getHandle(externalMemoryHandleType);
+            errcode_ret = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties1.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR);
+            extMemProperties1.push_back((cl_mem_properties)handle);
+            break;
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            log_info("Opaque D3DKMT handles are only supported on Windows\n");
+            handle = deviceMemory.getHandle(externalMemoryHandleType);
+            errcode_ret = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties1.push_back(
+                (cl_mem_properties)
+                    CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            extMemProperties1.push_back((cl_mem_properties)handle);
+            break;
+#elif !defined(__APPLE__)
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            log_info(" Opaque file descriptors are not supported on Windows\n");
+            fd = (int)deviceMemory.getHandle(externalMemoryHandleType);
+            errcode_ret = check_external_memory_handle_type(
+                devList[0], CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties1.push_back(
+                (cl_mem_properties)CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR);
+            extMemProperties1.push_back((cl_mem_properties)fd);
+            break;
+#endif
+        default:
+            ASSERT(0);
+            log_error("Unsupported external memory handle type\n");
+            break;
+    }
+    if (CL_SUCCESS != errcode_ret)
+    {
+        throw std::runtime_error("Unsupported external memory type\n ");
+    }
+    // Set cl_image_desc
+    size_t clImageFormatSize;
+    cl_image_desc image_desc;
+    memset(&image_desc, 0x0, sizeof(cl_image_desc));
+    cl_image_format img_format = { 0 };
+    const VkImageCreateInfo VulkanImageCreateInfo =
+        image2D.getVkImageCreateInfo();
+
+    errcode_ret = getCLImageInfoFromVkImageInfo(
+        &VulkanImageCreateInfo, image2D.getSize(), &img_format, &image_desc);
+    if (CL_SUCCESS != errcode_ret)
+    {
+        throw std::runtime_error("getCLImageInfoFromVkImageInfo failed!!!");
+    }
+
+    extMemProperties1.push_back((cl_mem_properties)CL_DEVICE_HANDLE_LIST_KHR);
+    extMemProperties1.push_back((cl_mem_properties)devList[0]);
+    extMemProperties1.push_back(
+        (cl_mem_properties)CL_DEVICE_HANDLE_LIST_END_KHR);
+    extMemProperties1.push_back(0);
+    m_externalMemory = clCreateImageWithProperties(
+        context, extMemProperties1.data(), CL_MEM_READ_WRITE, &img_format,
+        &image_desc, NULL, &errcode_ret);
+    if (CL_SUCCESS != errcode_ret)
+    {
+        throw std::runtime_error("clCreateImageWithProperties failed!!!");
+    }
+}
+
+cl_mem clExternalMemory::getExternalMemoryBuffer() { return m_externalMemory; }
+
+cl_mem clExternalMemoryImage::getExternalMemoryImage()
+{
+    return m_externalMemory;
+}
+
+clExternalMemoryImage::~clExternalMemoryImage()
+{
+    clReleaseMemObject(m_externalMemory);
+}
+
+clExternalMemory::~clExternalMemory() { clReleaseMemObject(m_externalMemory); }
+
+clExternalMemoryImage::clExternalMemoryImage() {}
+
+
+//////////////////////////////////////////
+// clExternalSemaphore implementation //
+//////////////////////////////////////////
+
+clExternalSemaphore::clExternalSemaphore(
+    const clExternalSemaphore &externalSemaphore)
+    : m_externalSemaphore(externalSemaphore.m_externalSemaphore)
+{}
+
+clExternalSemaphore::clExternalSemaphore(
+    const VulkanSemaphore &semaphore, cl_context context,
+    VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+    cl_device_id deviceId)
+{
+
+    cl_int err = 0;
+    cl_device_id devList[] = { deviceId, NULL };
+
+#ifdef _WIN32
+    if (!is_extension_available(devList[0], "cl_khr_external_semaphore_win32"))
+    {
+        throw std::runtime_error("Device does not support "
+                                 "cl_khr_external_semaphore_win32 extension\n");
+    }
+#elif !defined(__APPLE__)
+    if (!is_extension_available(devList[0],
+                                "cl_khr_external_semaphore_opaque_fd"))
+    {
+        throw std::runtime_error(
+            "Device does not support cl_khr_external_semaphore_opaque_fd "
+            "extension \n");
+    }
+#endif
+
+    std::vector<cl_semaphore_properties_khr> sema_props{
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_KHR,
+        (cl_semaphore_properties_khr)CL_SEMAPHORE_TYPE_BINARY_KHR,
+    };
+    switch (externalSemaphoreHandleType)
+    {
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+#ifdef _WIN32
+            ASSERT(0);
+#else
+            log_info(" Opaque file descriptors are not supported on Windows\n");
+            fd = (int)semaphore.getHandle(externalSemaphoreHandleType);
+            err = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+            sema_props.push_back(
+                (cl_semaphore_properties_khr)CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR);
+            sema_props.push_back((cl_semaphore_properties_khr)fd);
+#endif
+            break;
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+#ifndef _WIN32
+            ASSERT(0);
+#else
+            log_info(" Opaque NT handles are only supported on Windows\n");
+            handle = semaphore.getName().size()
+                ? NULL
+                : semaphore.getHandle(externalSemaphoreHandleType);
+            err = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+            sema_props.push_back((cl_semaphore_properties_khr)
+                                     CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR);
+            sema_props.push_back((cl_semaphore_properties_khr)handle);
+#endif
+            break;
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+#ifndef _WIN32
+            ASSERT(0);
+#else
+            log_info(" Opaque D3DKMT handles are only supported on Windows\n");
+            handle = semaphore.getHandle(externalSemaphoreHandleType);
+            err = check_external_semaphore_handle_type(
+                devList[0], CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            sema_props.push_back((cl_semaphore_properties_khr)
+                                     CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR);
+            sema_props.push_back((cl_semaphore_properties_khr)handle);
+#endif
+            break;
+        default:
+            ASSERT(0);
+            log_error("Unsupported external memory handle type\n");
+            break;
+    }
+    if (CL_SUCCESS != err)
+    {
+        throw std::runtime_error(
+            "Unsupported external sempahore handle type\n ");
+    }
+
+    sema_props.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_KHR);
+    sema_props.push_back((cl_semaphore_properties_khr)devList[0]);
+    sema_props.push_back(
+        (cl_semaphore_properties_khr)CL_DEVICE_HANDLE_LIST_END_KHR);
+    sema_props.push_back(0);
+    m_externalSemaphore =
+        clCreateSemaphoreWithPropertiesKHRptr(context, sema_props.data(), &err);
+    if (CL_SUCCESS != err)
+    {
+        log_error("clCreateSemaphoreWithPropertiesKHRptr failed with %d\n",
+                  err);
+        throw std::runtime_error(
+            "clCreateSemaphoreWithPropertiesKHRptr failed! ");
+    }
+}
+
+clExternalSemaphore::~clExternalSemaphore()
+{
+    cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore);
+    if (err != CL_SUCCESS)
+    {
+        throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!");
+    }
+}
+
+void clExternalSemaphore::signal(cl_command_queue cmd_queue)
+{
+    clEnqueueSignalSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
+                                    NULL, NULL);
+}
+
+void clExternalSemaphore::wait(cl_command_queue cmd_queue)
+{
+    clEnqueueWaitSemaphoresKHRptr(cmd_queue, 1, &m_externalSemaphore, NULL, 0,
+                                  NULL, NULL);
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
new file mode 100644
index 00000000..c1d2a766
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
@@ -0,0 +1,129 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _opencl_vulkan_wrapper_hpp_
+#define _opencl_vulkan_wrapper_hpp_
+
+#include "vulkan_wrapper.hpp"
+
+#if !defined(__APPLE__)
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#else
+#include <OpenCL/cl.h>
+#include <OpenCL/cl_ext.h>
+#endif
+
+typedef cl_semaphore_khr (*pfnclCreateSemaphoreWithPropertiesKHR)(
+    cl_context context, cl_semaphore_properties_khr *sema_props,
+    cl_int *errcode_ret);
+typedef cl_int (*pfnclEnqueueWaitSemaphoresKHR)(
+    cl_command_queue command_queue, cl_uint num_semaphores,
+    const cl_semaphore_khr *sema_list,
+    const cl_semaphore_payload_khr *sema_payload_list,
+    cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+    cl_event *event);
+typedef cl_int (*pfnclEnqueueSignalSemaphoresKHR)(
+    cl_command_queue command_queue, cl_uint num_semaphores,
+    const cl_semaphore_khr *sema_list,
+    const cl_semaphore_payload_khr *sema_payload_list,
+    cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
+    cl_event *event);
+typedef cl_int (*pfnclEnqueueAcquireExternalMemObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_mem_objects,
+    const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
+    const cl_event *event_wait_list, cl_event *event);
+typedef cl_int (*pfnclEnqueueReleaseExternalMemObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_mem_objects,
+    const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
+    const cl_event *event_wait_list, cl_event *event);
+typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object);
+
+extern pfnclCreateSemaphoreWithPropertiesKHR
+    clCreateSemaphoreWithPropertiesKHRptr;
+extern pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
+extern pfnclEnqueueSignalSemaphoresKHR clEnqueueSignalSemaphoresKHRptr;
+extern pfnclEnqueueAcquireExternalMemObjectsKHR
+    clEnqueueAcquireExternalMemObjectsKHRptr;
+extern pfnclEnqueueReleaseExternalMemObjectsKHR
+    clEnqueueReleaseExternalMemObjectsKHRptr;
+extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+
+cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t,
+                                     cl_image_format *, cl_image_desc *);
+cl_int check_external_memory_handle_type(
+    cl_device_id deviceID,
+    cl_external_memory_handle_type_khr requiredHandleType);
+cl_int check_external_semaphore_handle_type(
+    cl_device_id deviceID,
+    cl_external_semaphore_handle_type_khr requiredHandleType);
+
+class clExternalMemory {
+protected:
+    cl_mem m_externalMemory;
+    int fd;
+    void *handle;
+    clExternalMemory(const clExternalMemory &externalMemory);
+
+public:
+    clExternalMemory();
+    clExternalMemory(const VulkanDeviceMemory *deviceMemory,
+                     VulkanExternalMemoryHandleType externalMemoryHandleType,
+                     uint64_t offset, uint64_t size, cl_context context,
+                     cl_device_id deviceId);
+
+    virtual ~clExternalMemory();
+    cl_mem getExternalMemoryBuffer();
+};
+class clExternalMemoryImage {
+protected:
+    cl_mem m_externalMemory;
+    int fd;
+    void *handle;
+    cl_command_queue cmd_queue;
+    clExternalMemoryImage();
+
+public:
+    clExternalMemoryImage(
+        const VulkanDeviceMemory &deviceMemory,
+        VulkanExternalMemoryHandleType externalMemoryHandleType,
+        cl_context context, size_t totalImageMemSize, size_t imageWidth,
+        size_t imageHeight, size_t totalSize, const VulkanImage2D &image2D,
+        cl_device_id deviceId);
+    virtual ~clExternalMemoryImage();
+    cl_mem getExternalMemoryImage();
+};
+
+class clExternalSemaphore {
+protected:
+    cl_semaphore_khr m_externalSemaphore;
+    int fd;
+    void *handle;
+    clExternalSemaphore(const clExternalSemaphore &externalSemaphore);
+
+public:
+    clExternalSemaphore(
+        const VulkanSemaphore &deviceSemaphore, cl_context context,
+        VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+        cl_device_id deviceId);
+    virtual ~clExternalSemaphore();
+    void signal(cl_command_queue command_queue);
+    void wait(cl_command_queue command_queue);
+    // operator openclExternalSemaphore_t() const;
+};
+
+extern void init_cl_vk_ext(cl_platform_id);
+
+#endif // _opencl_vulkan_wrapper_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
new file mode 100644
index 00000000..017aefd2
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_api_list.hpp
@@ -0,0 +1,195 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_api_list_hpp_
+#define _vulkan_api_list_hpp_
+
+#define VK_FUNC_LIST                                                           \
+    VK_FUNC_DECL(vkEnumerateInstanceVersion)                                   \
+    VK_FUNC_DECL(vkEnumerateInstanceExtensionProperties)                       \
+    VK_FUNC_DECL(vkEnumerateInstanceLayerProperties)                           \
+    VK_FUNC_DECL(vkCreateInstance)                                             \
+    VK_FUNC_DECL(vkGetInstanceProcAddr)                                        \
+    VK_FUNC_DECL(vkGetDeviceProcAddr)                                          \
+    VK_FUNC_DECL(vkEnumeratePhysicalDevices)                                   \
+    VK_FUNC_DECL(vkGetPhysicalDeviceProperties)                                \
+    VK_FUNC_DECL(vkCreateDevice)                                               \
+    VK_FUNC_DECL(vkDestroyDevice)                                              \
+    VK_FUNC_DECL(vkGetDeviceQueue)                                             \
+    VK_FUNC_DECL(vkQueueWaitIdle)                                              \
+    VK_FUNC_DECL(vkCreateDescriptorSetLayout)                                  \
+    VK_FUNC_DECL(vkCreatePipelineLayout)                                       \
+    VK_FUNC_DECL(vkCreateShaderModule)                                         \
+    VK_FUNC_DECL(vkCreateComputePipelines)                                     \
+    VK_FUNC_DECL(vkCreateDescriptorPool)                                       \
+    VK_FUNC_DECL(vkAllocateDescriptorSets)                                     \
+    VK_FUNC_DECL(vkFreeDescriptorSets)                                         \
+    VK_FUNC_DECL(vkAllocateCommandBuffers)                                     \
+    VK_FUNC_DECL(vkBeginCommandBuffer)                                         \
+    VK_FUNC_DECL(vkCmdBindPipeline)                                            \
+    VK_FUNC_DECL(vkCmdBindDescriptorSets)                                      \
+    VK_FUNC_DECL(vkCmdPipelineBarrier)                                         \
+    VK_FUNC_DECL(vkCmdDispatch)                                                \
+    VK_FUNC_DECL(vkCmdFillBuffer)                                              \
+    VK_FUNC_DECL(vkCmdCopyBuffer)                                              \
+    VK_FUNC_DECL(vkCmdUpdateBuffer)                                            \
+    VK_FUNC_DECL(vkCmdCopyBufferToImage)                                       \
+    VK_FUNC_DECL(vkCmdCopyImageToBuffer)                                       \
+    VK_FUNC_DECL(vkEndCommandBuffer)                                           \
+    VK_FUNC_DECL(vkCreateBuffer)                                               \
+    VK_FUNC_DECL(vkCreateImageView)                                            \
+    VK_FUNC_DECL(vkAllocateMemory)                                             \
+    VK_FUNC_DECL(vkMapMemory)                                                  \
+    VK_FUNC_DECL(vkBindBufferMemory)                                           \
+    VK_FUNC_DECL(vkBindImageMemory)                                            \
+    VK_FUNC_DECL(vkUnmapMemory)                                                \
+    VK_FUNC_DECL(vkFreeMemory)                                                 \
+    VK_FUNC_DECL(vkCreateCommandPool)                                          \
+    VK_FUNC_DECL(vkResetCommandPool)                                           \
+    VK_FUNC_DECL(vkDestroyCommandPool)                                         \
+    VK_FUNC_DECL(vkResetCommandBuffer)                                         \
+    VK_FUNC_DECL(vkFreeCommandBuffers)                                         \
+    VK_FUNC_DECL(vkQueueSubmit)                                                \
+    VK_FUNC_DECL(vkCmdExecuteCommands)                                         \
+    VK_FUNC_DECL(vkCreateFence)                                                \
+    VK_FUNC_DECL(vkDestroyFence)                                               \
+    VK_FUNC_DECL(vkGetFenceStatus)                                             \
+    VK_FUNC_DECL(vkResetFences)                                                \
+    VK_FUNC_DECL(vkWaitForFences)                                              \
+    VK_FUNC_DECL(vkCreateSemaphore)                                            \
+    VK_FUNC_DECL(vkDestroySemaphore)                                           \
+    VK_FUNC_DECL(vkCreateEvent)                                                \
+    VK_FUNC_DECL(vkDestroyImageView)                                           \
+    VK_FUNC_DECL(vkCreateImage)                                                \
+    VK_FUNC_DECL(vkGetImageMemoryRequirements)                                 \
+    VK_FUNC_DECL(vkDestroyImage)                                               \
+    VK_FUNC_DECL(vkDestroyBuffer)                                              \
+    VK_FUNC_DECL(vkDestroyPipeline)                                            \
+    VK_FUNC_DECL(vkDestroyShaderModule)                                        \
+    VK_FUNC_DECL(vkGetPhysicalDeviceMemoryProperties)                          \
+    VK_FUNC_DECL(vkDestroyInstance)                                            \
+    VK_FUNC_DECL(vkUpdateDescriptorSets)                                       \
+    VK_FUNC_DECL(vkDestroyDescriptorPool)                                      \
+    VK_FUNC_DECL(vkDestroyPipelineLayout)                                      \
+    VK_FUNC_DECL(vkDestroyDescriptorSetLayout)                                 \
+    VK_FUNC_DECL(vkGetPhysicalDeviceQueueFamilyProperties)                     \
+    VK_FUNC_DECL(vkGetPhysicalDeviceFeatures)                                  \
+    VK_FUNC_DECL(vkGetPhysicalDeviceProperties2KHR)                            \
+    VK_FUNC_DECL(vkGetBufferMemoryRequirements)                                \
+    VK_FUNC_DECL(vkGetMemoryFdKHR)                                             \
+    VK_FUNC_DECL(vkGetSemaphoreFdKHR)                                          \
+    VK_FUNC_DECL(vkEnumeratePhysicalDeviceGroups)                              \
+    VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR)                    \
+    VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceFormatsKHR)                         \
+    VK_FUNC_DECL(vkGetPhysicalDeviceSurfacePresentModesKHR)                    \
+    VK_FUNC_DECL(vkEnumerateDeviceExtensionProperties)                         \
+    VK_FUNC_DECL(vkGetPhysicalDeviceSurfaceSupportKHR)
+
+#define VK_WINDOWS_FUNC_LIST                                                   \
+    VK_FUNC_DECL(vkGetMemoryWin32HandleKHR)                                    \
+    VK_FUNC_DECL(vkGetSemaphoreWin32HandleKHR)
+
+#define vkEnumerateInstanceVersion _vkEnumerateInstanceVersion
+#define vkEnumerateInstanceExtensionProperties                                 \
+    _vkEnumerateInstanceExtensionProperties
+#define vkEnumerateInstanceLayerProperties _vkEnumerateInstanceLayerProperties
+#define vkCreateInstance _vkCreateInstance
+#define vkGetInstanceProcAddr _vkGetInstanceProcAddr
+#define vkGetDeviceProcAddr _vkGetDeviceProcAddr
+#define vkEnumeratePhysicalDevices _vkEnumeratePhysicalDevices
+#define vkGetPhysicalDeviceProperties _vkGetPhysicalDeviceProperties
+#define vkCreateDevice _vkCreateDevice
+#define vkDestroyDevice _vkDestroyDevice
+#define vkGetDeviceQueue _vkGetDeviceQueue
+#define vkQueueWaitIdle _vkQueueWaitIdle
+#define vkCreateDescriptorSetLayout _vkCreateDescriptorSetLayout
+#define vkCreatePipelineLayout _vkCreatePipelineLayout
+#define vkCreateShaderModule _vkCreateShaderModule
+#define vkCreateComputePipelines _vkCreateComputePipelines
+#define vkCreateDescriptorPool _vkCreateDescriptorPool
+#define vkAllocateDescriptorSets _vkAllocateDescriptorSets
+#define vkFreeDescriptorSets _vkFreeDescriptorSets
+#define vkAllocateCommandBuffers _vkAllocateCommandBuffers
+#define vkBeginCommandBuffer _vkBeginCommandBuffer
+#define vkCmdBindPipeline _vkCmdBindPipeline
+#define vkCmdBindDescriptorSets _vkCmdBindDescriptorSets
+#define vkCmdPipelineBarrier _vkCmdPipelineBarrier
+#define vkCmdDispatch _vkCmdDispatch
+#define vkCmdFillBuffer _vkCmdFillBuffer
+#define vkCmdCopyBuffer _vkCmdCopyBuffer
+#define vkCmdUpdateBuffer _vkCmdUpdateBuffer
+#define vkCmdCopyBufferToImage _vkCmdCopyBufferToImage
+#define vkCmdCopyImageToBuffer _vkCmdCopyImageToBuffer
+#define vkEndCommandBuffer _vkEndCommandBuffer
+#define vkCreateBuffer _vkCreateBuffer
+#define vkCreateImageView _vkCreateImageView
+#define vkAllocateMemory _vkAllocateMemory
+#define vkMapMemory _vkMapMemory
+#define vkBindBufferMemory _vkBindBufferMemory
+#define vkBindImageMemory _vkBindImageMemory
+#define vkUnmapMemory _vkUnmapMemory
+#define vkFreeMemory _vkFreeMemory
+#define vkCreateCommandPool _vkCreateCommandPool
+#define vkResetCommandPool _vkResetCommandPool
+#define vkDestroyCommandPool _vkDestroyCommandPool
+#define vkResetCommandBuffer _vkResetCommandBuffer
+#define vkFreeCommandBuffers _vkFreeCommandBuffers
+#define vkQueueSubmit _vkQueueSubmit
+#define vkCmdExecuteCommands _vkCmdExecuteCommands
+#define vkCreateFence _vkCreateFence
+#define vkDestroyFence _vkDestroyFence
+#define vkGetFenceStatus _vkGetFenceStatus
+#define vkResetFences _vkResetFences
+#define vkWaitForFences _vkWaitForFences
+#define vkCreateSemaphore _vkCreateSemaphore
+#define vkDestroySemaphore _vkDestroySemaphore
+#define vkCreateEvent _vkCreateEvent
+#define vkDestroyImageView _vkDestroyImageView
+#define vkCreateImage _vkCreateImage
+#define vkGetImageMemoryRequirements _vkGetImageMemoryRequirements
+#define vkDestroyImage _vkDestroyImage
+#define vkDestroyBuffe _vkDestroyBuffer
+#define vkDestroyPipeline _vkDestroyPipeline
+#define vkDestroyShaderModule _vkDestroyShaderModule
+#define vkGetPhysicalDeviceMemoryProperties _vkGetPhysicalDeviceMemoryProperties
+#define vkDestroyInstance _vkDestroyInstance
+#define vkUpdateDescriptorSets _vkUpdateDescriptorSets
+#define vkDestroyDescriptorPool _vkDestroyDescriptorPool
+#define vkDestroyPipelineLayout _vkDestroyPipelineLayout
+#define vkDestroyDescriptorSetLayout _vkDestroyDescriptorSetLayout
+#define vkGetPhysicalDeviceQueueFamilyProperties                               \
+    _vkGetPhysicalDeviceQueueFamilyProperties
+#define vkGetPhysicalDeviceFeatures _vkGetPhysicalDeviceFeatures
+#define vkGetPhysicalDeviceProperties2KHR _vkGetPhysicalDeviceProperties2KHR
+#define vkGetBufferMemoryRequirements _vkGetBufferMemoryRequirements
+#define vkGetMemoryFdKHR _vkGetMemoryFdKHR
+#define vkGetSemaphoreFdKHR _vkGetSemaphoreFdKHR
+#define vkEnumeratePhysicalDeviceGroups _vkEnumeratePhysicalDeviceGroups
+#define vkGetPhysicalDeviceSurfaceCapabilitiesKHR                              \
+    _vkGetPhysicalDeviceSurfaceCapabilitiesKHR
+#define vkGetPhysicalDeviceSurfaceFormatsKHR                                   \
+    _vkGetPhysicalDeviceSurfaceFormatsKHR
+#define vkGetPhysicalDeviceSurfacePresentModesKHR                              \
+    _vkGetPhysicalDeviceSurfacePresentModesKHR
+#define vkEnumerateDeviceExtensionProperties                                   \
+    _vkEnumerateDeviceExtensionProperties
+#define vkGetPhysicalDeviceSurfaceSupportKHR                                   \
+    _vkGetPhysicalDeviceSurfaceSupportKHR
+
+#define vkGetMemoryWin32HandleKHR _vkGetMemoryWin32HandleKHR
+#define vkGetSemaphoreWin32HandleKHR _vkGetSemaphoreWin32HandleKHR
+
+#endif //_vulkan_api_list_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
new file mode 100644
index 00000000..db9d168f
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.cpp
@@ -0,0 +1,22 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "vulkan_interop_common.hpp"
+
+uint32_t innerIterations(5);
+uint32_t perfIterations(100);
+uint32_t stressIterations(1000);
+size_t cpuThreadsPerGpu(3);
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
new file mode 100644
index 00000000..18d84f09
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_interop_common.hpp
@@ -0,0 +1,50 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_interop_common_hpp_
+#define _vulkan_interop_common_hpp_
+
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_wrapper.hpp"
+#include "vulkan_list_map.hpp"
+#include "vulkan_utility.hpp"
+#include "opencl_vulkan_wrapper.hpp"
+
+// Number of iterations for loops within tests (default value 5)
+extern unsigned int innerIterations;
+// Number of iterations for loops within perf tests (default value 100)
+extern unsigned int perfIterations;
+// Number of iterations for loops within stress tests (default value 1000)
+extern unsigned int stressIterations;
+// Number of CPU threads per GPU (default value 3)
+extern size_t cpuThreadsPerGpu;
+// Number of command queues (default value 1)
+extern unsigned int numCQ;
+// Enable Multi-import of vulkan device memory
+extern bool multiImport;
+// Enable Multi-import of vulkan device memory under different context
+extern bool multiCtx;
+// Enable additional debug info logging
+extern bool debug_trace;
+
+extern bool useSingleImageKernel;
+extern bool useDeviceLocal;
+extern bool disableNTHandleType;
+// Enable offset for multiImport of vulkan device memory
+extern bool enableOffset;
+extern bool non_dedicated;
+
+#endif // _vulkan_interop_common_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
new file mode 100644
index 00000000..bdae5d22
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.cpp
@@ -0,0 +1,424 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifdef _WIN32
+#define NOMINMAX
+#endif
+#include "vulkan_list_map.hpp"
+#include "vulkan_utility.hpp"
+#include "vulkan_wrapper.hpp"
+
+/////////////////////////////////////////////
+// VulkanPhysicalDeviceList implementation //
+/////////////////////////////////////////////
+
+VulkanPhysicalDeviceList::VulkanPhysicalDeviceList(
+    const VulkanPhysicalDeviceList &physicalDeviceList)
+{}
+
+VulkanPhysicalDeviceList::VulkanPhysicalDeviceList() {}
+
+VulkanPhysicalDeviceList::~VulkanPhysicalDeviceList() {}
+
+/////////////////////////////////////////
+// VulkanMemoryHeapList implementation //
+/////////////////////////////////////////
+
+VulkanMemoryHeapList::VulkanMemoryHeapList(
+    const VulkanMemoryHeapList &memoryHeapList)
+{}
+
+VulkanMemoryHeapList::VulkanMemoryHeapList() {}
+
+VulkanMemoryHeapList::~VulkanMemoryHeapList() {}
+
+/////////////////////////////////////////
+// VulkanMemoryTypeList implementation //
+/////////////////////////////////////////
+
+VulkanMemoryTypeList::VulkanMemoryTypeList(
+    const VulkanMemoryTypeList &memoryTypeList)
+{}
+
+VulkanMemoryTypeList::VulkanMemoryTypeList() {}
+
+VulkanMemoryTypeList::~VulkanMemoryTypeList() {}
+
+//////////////////////////////////////////
+// VulkanQueueFamilyList implementation //
+//////////////////////////////////////////
+
+VulkanQueueFamilyList::VulkanQueueFamilyList(
+    const VulkanQueueFamilyList &queueFamilyList)
+{}
+
+VulkanQueueFamilyList::VulkanQueueFamilyList() {}
+
+VulkanQueueFamilyList::~VulkanQueueFamilyList() {}
+
+/////////////////////////////////////////////////////
+// VulkanQueueFamilyToQueueCountMap implementation //
+/////////////////////////////////////////////////////
+
+VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap(
+    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+{}
+
+VulkanQueueFamilyToQueueCountMap::VulkanQueueFamilyToQueueCountMap(
+    uint32_t numQueuesPerFamily)
+{
+    uint32_t maxQueueFamilyCount = 0;
+    const VulkanPhysicalDeviceList &physicalDeviceList =
+        getVulkanInstance().getPhysicalDeviceList();
+    for (size_t pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++)
+    {
+        maxQueueFamilyCount = std::max(
+            maxQueueFamilyCount,
+            (uint32_t)physicalDeviceList[pdIdx].getQueueFamilyList().size());
+    }
+
+    for (uint32_t qfIdx = 0; qfIdx < maxQueueFamilyCount; qfIdx++)
+    {
+        insert(qfIdx, numQueuesPerFamily);
+    }
+}
+
+VulkanQueueFamilyToQueueCountMap::~VulkanQueueFamilyToQueueCountMap() {}
+
+////////////////////////////////////////////////////
+// VulkanQueueFamilyToQueueListMap implementation //
+////////////////////////////////////////////////////
+
+VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap(
+    const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap)
+{}
+
+VulkanQueueFamilyToQueueListMap::VulkanQueueFamilyToQueueListMap() {}
+
+VulkanQueueFamilyToQueueListMap::~VulkanQueueFamilyToQueueListMap() {}
+
+void VulkanQueueFamilyToQueueListMap::insert(uint32_t key,
+                                             VulkanQueueList &queueList)
+{
+    m_map.insert(std::pair<uint32_t, std::reference_wrapper<VulkanQueueList>>(
+        key, std::reference_wrapper<VulkanQueueList>(queueList)));
+}
+
+VulkanQueueList &VulkanQueueFamilyToQueueListMap::operator[](uint32_t key)
+{
+    return m_map.at(key).get();
+}
+
+////////////////////////////////////
+// VulkanQueueList implementation //
+////////////////////////////////////
+
+VulkanQueueList::VulkanQueueList(const VulkanQueueList &queueList) {}
+
+VulkanQueueList::VulkanQueueList() {}
+
+VulkanQueueList::~VulkanQueueList() {}
+
+/////////////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutBindingList implementation //
+/////////////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+    const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList() {}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+    size_t numDescriptorSetLayoutBindings, VulkanDescriptorType descriptorType,
+    uint32_t descriptorCount, VulkanShaderStage shaderStage)
+{
+    for (size_t idx = 0; idx < numDescriptorSetLayoutBindings; idx++)
+    {
+        VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding =
+            new VulkanDescriptorSetLayoutBinding((uint32_t)idx, descriptorType,
+                                                 descriptorCount, shaderStage);
+        add(*descriptorSetLayoutBinding);
+    }
+}
+
+VulkanDescriptorSetLayoutBindingList::VulkanDescriptorSetLayoutBindingList(
+    VulkanDescriptorType descriptorType0, uint32_t descriptorCount0,
+    VulkanDescriptorType descriptorType1, uint32_t descriptorCount1,
+    VulkanShaderStage shaderStage)
+{
+    for (uint32_t idx = 0; idx < descriptorCount0; idx++)
+    {
+        VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding0 =
+            new VulkanDescriptorSetLayoutBinding(idx, descriptorType0, 1,
+                                                 shaderStage);
+        add(*descriptorSetLayoutBinding0);
+    }
+    for (uint32_t idx = 0; idx < descriptorCount1; idx++)
+    {
+        VulkanDescriptorSetLayoutBinding *descriptorSetLayoutBinding1 =
+            new VulkanDescriptorSetLayoutBinding(
+                descriptorCount0 + idx, descriptorType1, 1, shaderStage);
+        add(*descriptorSetLayoutBinding1);
+    }
+}
+
+VulkanDescriptorSetLayoutBindingList::~VulkanDescriptorSetLayoutBindingList()
+{
+    for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+    {
+        VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding =
+            m_wrapperList[idx];
+        delete &descriptorSetLayoutBinding;
+    }
+}
+
+//////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutList implementation //
+//////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList(
+    const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+{}
+
+VulkanDescriptorSetLayoutList::VulkanDescriptorSetLayoutList() {}
+
+VulkanDescriptorSetLayoutList::~VulkanDescriptorSetLayoutList() {}
+
+////////////////////////////////////////////
+// VulkanCommandBufferList implementation //
+////////////////////////////////////////////
+
+VulkanCommandBufferList::VulkanCommandBufferList(
+    const VulkanCommandBufferList &commandBufferList)
+{}
+
+VulkanCommandBufferList::VulkanCommandBufferList() {}
+
+VulkanCommandBufferList::VulkanCommandBufferList(
+    size_t numCommandBuffers, const VulkanDevice &device,
+    const VulkanCommandPool &commandPool)
+{
+    for (size_t idx = 0; idx < numCommandBuffers; idx++)
+    {
+        VulkanCommandBuffer *commandBuffer =
+            new VulkanCommandBuffer(device, commandPool);
+        add(*commandBuffer);
+    }
+}
+
+VulkanCommandBufferList::~VulkanCommandBufferList()
+{
+    for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+    {
+        VulkanCommandBuffer &commandBuffer = m_wrapperList[idx];
+        delete &commandBuffer;
+    }
+}
+
+/////////////////////////////////////
+// VulkanBufferList implementation //
+/////////////////////////////////////
+
+VulkanBufferList::VulkanBufferList(const VulkanBufferList &bufferList) {}
+
+VulkanBufferList::VulkanBufferList(
+    size_t numBuffers, const VulkanDevice &device, uint64_t size,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode,
+    const VulkanQueueFamilyList &queueFamilyList)
+{
+    for (size_t bIdx = 0; bIdx < numBuffers; bIdx++)
+    {
+        VulkanBuffer *buffer =
+            new VulkanBuffer(device, size, externalMemoryHandleType,
+                             bufferUsage, sharingMode, queueFamilyList);
+        add(*buffer);
+    }
+}
+
+VulkanBufferList::~VulkanBufferList()
+{
+    for (size_t bIdx = 0; bIdx < m_wrapperList.size(); bIdx++)
+    {
+        VulkanBuffer &buffer = m_wrapperList[bIdx];
+        delete &buffer;
+    }
+}
+
+//////////////////////////////////////
+// VulkanImage2DList implementation //
+//////////////////////////////////////
+
+VulkanImage2DList::VulkanImage2DList(const VulkanImage2DList &image2DList) {}
+
+VulkanImage2DList::VulkanImage2DList(
+    size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+    uint64_t baseOffset, uint64_t interImageOffset, const VulkanDevice &device,
+    VulkanFormat format, uint32_t width, uint32_t height, uint32_t mipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t i2DIdx = 0; i2DIdx < numImages; i2DIdx++)
+    {
+        VulkanImage2D *image2D = new VulkanImage2D(
+            device, format, width, height, mipLevels, externalMemoryHandleType,
+            imageCreateFlag, imageUsage, sharingMode);
+        add(*image2D);
+        deviceMemory[i2DIdx]->bindImage(
+            *image2D, baseOffset + (i2DIdx * interImageOffset));
+    }
+}
+
+VulkanImage2DList::VulkanImage2DList(
+    size_t numImages, const VulkanDevice &device, VulkanFormat format,
+    uint32_t width, uint32_t height, uint32_t mipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+{
+    for (size_t bIdx = 0; bIdx < numImages; bIdx++)
+    {
+        VulkanImage2D *image2D = new VulkanImage2D(
+            device, format, width, height, mipLevels, externalMemoryHandleType,
+            imageCreateFlag, imageUsage, sharingMode);
+        add(*image2D);
+    }
+}
+
+VulkanImage2DList::~VulkanImage2DList()
+{
+    for (size_t i2DIdx = 0; i2DIdx < m_wrapperList.size(); i2DIdx++)
+    {
+        VulkanImage2D &image2D = m_wrapperList[i2DIdx];
+        delete &image2D;
+    }
+}
+
+////////////////////////////////////////
+// VulkanImageViewList implementation //
+////////////////////////////////////////
+
+VulkanImageViewList::VulkanImageViewList(const VulkanImageViewList &image2DList)
+{}
+
+VulkanImageViewList::VulkanImageViewList(const VulkanDevice &device,
+                                         const VulkanImage2DList &image2DList,
+                                         bool createImageViewPerMipLevel)
+{
+    for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+    {
+        if (createImageViewPerMipLevel)
+        {
+            for (uint32_t mipLevel = 0;
+                 mipLevel < image2DList[i2DIdx].getNumMipLevels(); mipLevel++)
+            {
+                VulkanImageView *image2DView =
+                    new VulkanImageView(device, image2DList[i2DIdx],
+                                        VULKAN_IMAGE_VIEW_TYPE_2D, mipLevel, 1);
+                add(*image2DView);
+            }
+        }
+        else
+        {
+            VulkanImageView *image2DView = new VulkanImageView(
+                device, image2DList[i2DIdx], VULKAN_IMAGE_VIEW_TYPE_2D);
+            add(*image2DView);
+        }
+    }
+}
+
+VulkanImageViewList::~VulkanImageViewList()
+{
+    for (size_t ivIdx = 0; ivIdx < m_wrapperList.size(); ivIdx++)
+    {
+        VulkanImageView &imageView = m_wrapperList[ivIdx];
+        delete &imageView;
+    }
+}
+
+///////////////////////////////////////////
+// VulkanDeviceMemoryList implementation //
+///////////////////////////////////////////
+
+VulkanDeviceMemoryList::VulkanDeviceMemoryList(
+    const VulkanDeviceMemoryList &deviceMemoryList)
+{}
+
+VulkanDeviceMemoryList::VulkanDeviceMemoryList(
+    size_t numImages, const VulkanImage2DList &image2DList,
+    const VulkanDevice &device, const VulkanMemoryType &memoryType,
+    VulkanExternalMemoryHandleType externalMemoryHandleType)
+{
+    for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+    {
+        VulkanDeviceMemory *deviceMemory = new VulkanDeviceMemory(
+            device, image2DList[i2DIdx], memoryType, externalMemoryHandleType);
+        add(*deviceMemory);
+        deviceMemory->bindImage(image2DList[i2DIdx]);
+    }
+}
+
+VulkanDeviceMemoryList::~VulkanDeviceMemoryList()
+{
+    for (size_t dmIdx = 0; dmIdx < m_wrapperList.size(); dmIdx++)
+    {
+        VulkanDeviceMemory &deviceMemory = m_wrapperList[dmIdx];
+        delete &deviceMemory;
+    }
+}
+
+////////////////////////////////////////
+// VulkanSemaphoreList implementation //
+////////////////////////////////////////
+
+VulkanSemaphoreList::VulkanSemaphoreList(
+    const VulkanSemaphoreList &semaphoreList)
+{}
+
+VulkanSemaphoreList::VulkanSemaphoreList() {}
+
+VulkanSemaphoreList::VulkanSemaphoreList(
+    size_t numSemaphores, const VulkanDevice &device,
+    VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+    const std::wstring namePrefix)
+{
+    std::wstring name = L"";
+    for (size_t idx = 0; idx < numSemaphores; idx++)
+    {
+        if (namePrefix.size())
+        {
+            const size_t maxNameSize = 256;
+            wchar_t tempName[maxNameSize];
+            swprintf(tempName, maxNameSize, L"%s%d", namePrefix.c_str(),
+                     (int)idx);
+            name = tempName;
+        }
+        VulkanSemaphore *semaphore =
+            new VulkanSemaphore(device, externalSemaphoreHandleType, name);
+        add(*semaphore);
+    }
+}
+
+VulkanSemaphoreList::~VulkanSemaphoreList()
+{
+    for (size_t idx = 0; idx < m_wrapperList.size(); idx++)
+    {
+        VulkanSemaphore &Semaphore = m_wrapperList[idx];
+        delete &Semaphore;
+    }
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
new file mode 100644
index 00000000..831403e1
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -0,0 +1,389 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_list_map_hpp_
+#define _vulkan_list_map_hpp_
+
+#include <functional>
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_utility.hpp"
+#include <iostream>
+template <class VulkanWrapper, class VulkanNative> class VulkanList {
+protected:
+    std::vector<std::reference_wrapper<VulkanWrapper>> m_wrapperList;
+    std::vector<std::reference_wrapper<const VulkanWrapper>> m_constWrapperList;
+    std::vector<VulkanNative> m_nativeList;
+
+    VulkanList(const VulkanList &list);
+    VulkanList();
+    virtual ~VulkanList();
+    virtual void add(VulkanWrapper &wrapper);
+
+public:
+    virtual void add(const VulkanWrapper &wrapper);
+    virtual size_t size() const;
+    virtual const VulkanWrapper &operator[](size_t idx) const;
+    virtual VulkanWrapper &operator[](size_t idx);
+    virtual operator const VulkanNative *() const;
+};
+
+template <class VulkanKey, class VulkanValue> class VulkanMap {
+protected:
+    std::map<VulkanKey, VulkanValue> m_map;
+
+    VulkanMap(const VulkanMap &map);
+    VulkanMap();
+    virtual ~VulkanMap();
+
+public:
+    void insert(const VulkanKey &key, VulkanValue &value);
+    const VulkanValue &operator[](const VulkanKey &key) const;
+    VulkanValue &operator[](const VulkanKey &key);
+};
+
+class VulkanPhysicalDeviceList
+    : public VulkanList<VulkanPhysicalDevice, VkPhysicalDevice> {
+    friend class VulkanInstance;
+
+protected:
+    VulkanPhysicalDeviceList(
+        const VulkanPhysicalDeviceList &physicalDeviceList);
+
+public:
+    VulkanPhysicalDeviceList();
+    virtual ~VulkanPhysicalDeviceList();
+};
+
+class VulkanQueueFamilyList : public VulkanList<VulkanQueueFamily, uint32_t> {
+    friend class VulkanPhysicalDevice;
+
+protected:
+    VulkanQueueFamilyList(const VulkanQueueFamilyList &queueFamilyList);
+
+public:
+    VulkanQueueFamilyList();
+    virtual ~VulkanQueueFamilyList();
+};
+
+class VulkanMemoryHeapList : public VulkanList<VulkanMemoryHeap, uint32_t> {
+    friend class VulkanPhysicalDevice;
+
+protected:
+    VulkanMemoryHeapList(const VulkanMemoryHeapList &memoryHeapList);
+
+public:
+    VulkanMemoryHeapList();
+    virtual ~VulkanMemoryHeapList();
+};
+
+class VulkanMemoryTypeList : public VulkanList<VulkanMemoryType, uint32_t> {
+    friend class VulkanPhysicalDevice;
+    friend class VulkanBuffer;
+    friend class VulkanImage;
+
+protected:
+    VulkanMemoryTypeList(const VulkanMemoryTypeList &memoryTypeList);
+
+public:
+    VulkanMemoryTypeList();
+    virtual ~VulkanMemoryTypeList();
+};
+
+class VulkanQueueFamilyToQueueCountMap : public VulkanMap<uint32_t, uint32_t> {
+protected:
+    VulkanQueueFamilyToQueueCountMap(
+        const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap);
+
+public:
+    VulkanQueueFamilyToQueueCountMap(uint32_t numQueuesPerFamily = 0);
+    virtual ~VulkanQueueFamilyToQueueCountMap();
+};
+
+class VulkanQueueList : public VulkanList<VulkanQueue, VkQueue> {
+    friend class VulkanDevice;
+
+protected:
+    VulkanQueueList(const VulkanQueueList &queueList);
+
+public:
+    VulkanQueueList();
+    virtual ~VulkanQueueList();
+};
+
+class VulkanQueueFamilyToQueueListMap
+    : public VulkanMap<uint32_t, std::reference_wrapper<VulkanQueueList>> {
+protected:
+    VulkanQueueFamilyToQueueListMap(
+        const VulkanQueueFamilyToQueueListMap &queueFamilyToQueueMap);
+
+public:
+    VulkanQueueFamilyToQueueListMap();
+    virtual ~VulkanQueueFamilyToQueueListMap();
+    void insert(uint32_t key, VulkanQueueList &queueList);
+    VulkanQueueList &operator[](uint32_t key);
+};
+
+class VulkanDescriptorSetLayoutBindingList
+    : public VulkanList<VulkanDescriptorSetLayoutBinding,
+                        VkDescriptorSetLayoutBinding> {
+protected:
+    VulkanDescriptorSetLayoutBindingList(
+        const VulkanDescriptorSetLayoutBindingList
+            &descriptorSetLayoutBindingList);
+
+public:
+    VulkanDescriptorSetLayoutBindingList();
+    VulkanDescriptorSetLayoutBindingList(
+        size_t numDescriptorSetLayoutBindings,
+        VulkanDescriptorType descriptorType, uint32_t descriptorCount = 1,
+        VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+    VulkanDescriptorSetLayoutBindingList(
+        VulkanDescriptorType descriptorType0, uint32_t descriptorCount0,
+        VulkanDescriptorType descriptorType1, uint32_t descriptorCount1,
+        VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+    virtual ~VulkanDescriptorSetLayoutBindingList();
+};
+
+class VulkanDescriptorSetLayoutList
+    : public VulkanList<VulkanDescriptorSetLayout, VkDescriptorSetLayout> {
+protected:
+    VulkanDescriptorSetLayoutList(
+        const VulkanDescriptorSetLayoutList &descriptorSetLayoutList);
+
+public:
+    VulkanDescriptorSetLayoutList();
+    virtual ~VulkanDescriptorSetLayoutList();
+};
+
+class VulkanCommandBufferList
+    : public VulkanList<VulkanCommandBuffer, VkCommandBuffer> {
+protected:
+    VulkanCommandBufferList(const VulkanCommandBufferList &commandBufferList);
+
+public:
+    VulkanCommandBufferList();
+    VulkanCommandBufferList(size_t numCommandBuffers,
+                            const VulkanDevice &device,
+                            const VulkanCommandPool &commandPool);
+    virtual ~VulkanCommandBufferList();
+};
+
+class VulkanBufferList : public VulkanList<VulkanBuffer, VkBuffer> {
+protected:
+    VulkanBufferList(const VulkanBufferList &bufferList);
+
+public:
+    VulkanBufferList(
+        size_t numBuffers, const VulkanDevice &device, uint64_t size,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanBufferUsage bufferUsage =
+            VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE,
+        const VulkanQueueFamilyList &queueFamilyList =
+            getEmptyVulkanQueueFamilyList());
+    virtual ~VulkanBufferList();
+};
+
+class VulkanImage2DList : public VulkanList<VulkanImage2D, VkImage> {
+protected:
+    VulkanImage2DList(const VulkanImage2DList &image2DList);
+
+public:
+    VulkanImage2DList(
+        size_t numImages, std::vector<VulkanDeviceMemory *> &deviceMemory,
+        uint64_t baseOffset, uint64_t interImageOffset,
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        uint32_t height, uint32_t mipLevels,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    VulkanImage2DList(
+        size_t numImages, const VulkanDevice &device, VulkanFormat format,
+        uint32_t width, uint32_t height, uint32_t mipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage2DList();
+};
+
+class VulkanImageViewList : public VulkanList<VulkanImageView, VkImageView> {
+protected:
+    VulkanImageViewList(const VulkanImageViewList &imageViewList);
+
+public:
+    VulkanImageViewList(const VulkanDevice &device,
+                        const VulkanImage2DList &image2DList,
+                        bool createImageViewPerMipLevel = true);
+    virtual ~VulkanImageViewList();
+};
+
+class VulkanDeviceMemoryList
+    : public VulkanList<VulkanDeviceMemory, VkDeviceMemory> {
+protected:
+    VulkanDeviceMemoryList(const VulkanDeviceMemoryList &deviceMemoryList);
+
+public:
+    VulkanDeviceMemoryList(
+        size_t numImages, const VulkanImage2DList &image2DList,
+        const VulkanDevice &device, const VulkanMemoryType &memoryType,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE);
+    virtual ~VulkanDeviceMemoryList();
+};
+
+class VulkanSemaphoreList : public VulkanList<VulkanSemaphore, VkSemaphore> {
+protected:
+    VulkanSemaphoreList(const VulkanSemaphoreList &semaphoreList);
+
+public:
+    VulkanSemaphoreList();
+    VulkanSemaphoreList(
+        size_t numSemaphores, const VulkanDevice &device,
+        VulkanExternalSemaphoreHandleType externalSemaphoreHandleType =
+            VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE,
+        const std::wstring namePrefix = L"");
+    virtual ~VulkanSemaphoreList();
+};
+
+///////////////////////////////
+// VulkanList implementation //
+///////////////////////////////
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::VulkanList(const VulkanList &list)
+    : m_wrapperList(list.m_wrapperList),
+      m_constWrapperList(list.m_constWrapperList),
+      m_nativeList(list.m_nativeList)
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::VulkanList()
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::~VulkanList()
+{}
+
+template <class VulkanWrapper, class VulkanNative>
+void VulkanList<VulkanWrapper, VulkanNative>::add(VulkanWrapper &wrapper)
+{
+
+    if (m_constWrapperList.size() != size_t(0))
+    {
+        std::cout << "This list can only contain externally allocated objects"
+                  << std::endl;
+        return;
+    }
+    m_wrapperList.push_back(std::reference_wrapper<VulkanWrapper>(wrapper));
+    m_nativeList.push_back((VulkanNative)wrapper);
+}
+
+template <class VulkanWrapper, class VulkanNative>
+void VulkanList<VulkanWrapper, VulkanNative>::add(const VulkanWrapper &wrapper)
+{
+    if (m_wrapperList.size() != size_t(0))
+    {
+        std::cout << "This list cannot contain externally allocated objects"
+                  << std::endl;
+        return;
+    }
+
+    m_constWrapperList.push_back(
+        std::reference_wrapper<const VulkanWrapper>(wrapper));
+    m_nativeList.push_back((VulkanNative)wrapper);
+}
+
+template <class VulkanWrapper, class VulkanNative>
+size_t VulkanList<VulkanWrapper, VulkanNative>::size() const
+{
+    return (m_wrapperList.size() > 0) ? m_wrapperList.size()
+                                      : m_constWrapperList.size();
+}
+
+template <class VulkanWrapper, class VulkanNative>
+const VulkanWrapper &
+    VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx) const
+{
+    if (idx < size())
+    {
+        // CHECK_LT(idx, size());
+        return (m_wrapperList.size() > 0) ? m_wrapperList[idx].get()
+                                          : m_constWrapperList[idx].get();
+    }
+}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
+{
+    if (idx < m_wrapperList.size())
+    {
+        // CHECK_LT(idx, m_wrapperList.size());
+        return m_wrapperList[idx].get();
+    }
+}
+
+template <class VulkanWrapper, class VulkanNative>
+VulkanList<VulkanWrapper, VulkanNative>::operator const VulkanNative *() const
+{
+    return m_nativeList.data();
+}
+
+//////////////////////////////
+// VulkanMap implementation //
+//////////////////////////////
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::VulkanMap(const VulkanMap &map)
+    : m_map(map.m_map)
+{}
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::VulkanMap()
+{}
+
+template <class VulkanKey, class VulkanValue>
+VulkanMap<VulkanKey, VulkanValue>::~VulkanMap()
+{}
+
+template <class VulkanKey, class VulkanValue>
+void VulkanMap<VulkanKey, VulkanValue>::insert(const VulkanKey &key,
+                                               VulkanValue &value)
+{
+    m_map.insert(std::pair<VulkanKey, std::reference_wrapper<VulkanValue>>(
+        key, std::reference_wrapper<VulkanValue>(value)));
+}
+
+template <class VulkanKey, class VulkanValue>
+const VulkanValue &
+    VulkanMap<VulkanKey, VulkanValue>::operator[](const VulkanKey &key) const
+{
+    return m_map.at(key);
+}
+
+template <class VulkanKey, class VulkanValue>
+VulkanValue &VulkanMap<VulkanKey, VulkanValue>::operator[](const VulkanKey &key)
+{
+    return m_map.at(key);
+}
+
+#endif // _vulkan_list_map_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
new file mode 100644
index 00000000..81e12621
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -0,0 +1,693 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "vulkan_utility.hpp"
+#include "vulkan_wrapper.hpp"
+#include <assert.h>
+#include <iostream>
+#include <set>
+#include <string>
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#if defined(_WIN32) || defined(_WIN64)
+#include <versionhelpers.h>
+#endif
+#define ASSERT(x) assert((x))
+#define BUFFERSIZE 3000
+
+
+const VulkanInstance &getVulkanInstance()
+{
+    static VulkanInstance instance;
+    return instance;
+}
+
+const VulkanPhysicalDevice &getVulkanPhysicalDevice()
+{
+    size_t pdIdx;
+    cl_int errNum = 0;
+    cl_platform_id platform = NULL;
+    cl_uchar uuid[CL_UUID_SIZE_KHR];
+    cl_device_id *devices;
+    char *extensions = NULL;
+    size_t extensionSize = 0;
+    cl_uint num_devices = 0;
+    cl_uint device_no = 0;
+    const size_t bufsize = BUFFERSIZE;
+    char buf[BUFFERSIZE];
+    const VulkanInstance &instance = getVulkanInstance();
+    const VulkanPhysicalDeviceList &physicalDeviceList =
+        instance.getPhysicalDeviceList();
+
+    // get the platform ID
+    errNum = clGetPlatformIDs(1, &platform, NULL);
+    if (errNum != CL_SUCCESS)
+    {
+        printf("Error: Failed to get platform\n");
+        throw std::runtime_error("Error: Failed to get number of platform\n");
+    }
+
+    errNum =
+        clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+    if (CL_SUCCESS != errNum)
+    {
+        throw std::runtime_error(
+            "Error: clGetDeviceIDs failed in returning of devices\n");
+    }
+    devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id));
+    if (NULL == devices)
+    {
+        throw std::runtime_error(
+            "Error: Unable to allocate memory for devices\n");
+    }
+    errNum = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices,
+                            NULL);
+    if (CL_SUCCESS != errNum)
+    {
+        throw std::runtime_error("Error: Failed to get deviceID.\n");
+    }
+    bool is_selected = false;
+    for (device_no = 0; device_no < num_devices; device_no++)
+    {
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS, 0,
+                                 NULL, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            throw std::runtime_error("Error in clGetDeviceInfo for getting "
+                                     "device_extension size....\n");
+        }
+        extensions = (char *)malloc(extensionSize);
+        if (NULL == extensions)
+        {
+            throw std::runtime_error(
+                "Unable to allocate memory for extensions\n");
+        }
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_EXTENSIONS,
+                                 extensionSize, extensions, NULL);
+        if (CL_SUCCESS != errNum)
+        {
+            throw std::runtime_error("Error: Error in clGetDeviceInfo for "
+                                     "getting device_extension\n");
+        }
+        errNum = clGetDeviceInfo(devices[device_no], CL_DEVICE_UUID_KHR,
+                                 CL_UUID_SIZE_KHR, uuid, &extensionSize);
+        if (CL_SUCCESS != errNum)
+        {
+            throw std::runtime_error(
+                "Error: clGetDeviceInfo failed with error\n");
+        }
+        free(extensions);
+        for (pdIdx = 0; pdIdx < physicalDeviceList.size(); pdIdx++)
+        {
+            if (!memcmp(&uuid, physicalDeviceList[pdIdx].getUUID(),
+                        VK_UUID_SIZE))
+            {
+                std::cout << "Selected physical device = "
+                          << physicalDeviceList[pdIdx] << std::endl;
+                is_selected = true;
+                break;
+            }
+        }
+        if (is_selected)
+        {
+            break;
+        }
+    }
+
+    if ((pdIdx >= physicalDeviceList.size())
+        || (physicalDeviceList[pdIdx] == (VkPhysicalDevice)VK_NULL_HANDLE))
+    {
+        throw std::runtime_error("failed to find a suitable GPU!");
+    }
+    std::cout << "Selected physical device is: " << physicalDeviceList[pdIdx]
+              << std::endl;
+    return physicalDeviceList[pdIdx];
+}
+
+const VulkanQueueFamily &getVulkanQueueFamily(uint32_t queueFlags)
+{
+    size_t qfIdx;
+    const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice();
+    const VulkanQueueFamilyList &queueFamilyList =
+        physicalDevice.getQueueFamilyList();
+
+    for (qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++)
+    {
+        if ((queueFamilyList[qfIdx].getQueueFlags() & queueFlags) == queueFlags)
+        {
+            break;
+        }
+    }
+
+    return queueFamilyList[qfIdx];
+}
+
+const VulkanMemoryType &
+getVulkanMemoryType(const VulkanDevice &device,
+                    VulkanMemoryTypeProperty memoryTypeProperty)
+{
+    size_t mtIdx;
+    const VulkanMemoryTypeList &memoryTypeList =
+        device.getPhysicalDevice().getMemoryTypeList();
+
+    for (mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+    {
+        if ((memoryTypeList[mtIdx].getMemoryTypeProperty() & memoryTypeProperty)
+            == memoryTypeProperty)
+        {
+            break;
+        }
+    }
+
+    // CHECK_LT(mtIdx, memoryTypeList.size());
+    return memoryTypeList[mtIdx];
+}
+
+bool checkVkSupport()
+{
+    bool result = true;
+    const VulkanInstance &instance = getVulkanInstance();
+    const VulkanPhysicalDeviceList &physicalDeviceList =
+        instance.getPhysicalDeviceList();
+    if (physicalDeviceList == NULL)
+    {
+        std::cout << "physicalDeviceList is null, No GPUs found with "
+                     "Vulkan support !!!\n";
+        result = false;
+    }
+    return result;
+}
+
+const VulkanQueueFamilyList &getEmptyVulkanQueueFamilyList()
+{
+    static VulkanQueueFamilyList queueFamilyList;
+    return queueFamilyList;
+}
+
+const VulkanDescriptorSetLayoutList &getEmptyVulkanDescriptorSetLayoutList()
+{
+    static VulkanDescriptorSetLayoutList descriptorSetLayoutList;
+
+    return descriptorSetLayoutList;
+}
+
+const VulkanQueueFamilyToQueueCountMap &
+getDefaultVulkanQueueFamilyToQueueCountMap()
+{
+    static VulkanQueueFamilyToQueueCountMap queueFamilyToQueueCountMap(1);
+
+    return queueFamilyToQueueCountMap;
+}
+
+const std::vector<VulkanExternalMemoryHandleType>
+getSupportedVulkanExternalMemoryHandleTypeList()
+{
+    std::vector<VulkanExternalMemoryHandleType> externalMemoryHandleTypeList;
+
+#if _WIN32
+    if (IsWindows8OrGreater())
+    {
+        externalMemoryHandleTypeList.push_back(
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT);
+    }
+    externalMemoryHandleTypeList.push_back(
+        VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT);
+#else
+    externalMemoryHandleTypeList.push_back(
+        VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD);
+#endif
+
+    return externalMemoryHandleTypeList;
+}
+
+const std::vector<VulkanExternalSemaphoreHandleType>
+getSupportedVulkanExternalSemaphoreHandleTypeList()
+{
+    std::vector<VulkanExternalSemaphoreHandleType>
+        externalSemaphoreHandleTypeList;
+
+#if _WIN32
+    if (IsWindows8OrGreater())
+    {
+        externalSemaphoreHandleTypeList.push_back(
+            VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT);
+    }
+    externalSemaphoreHandleTypeList.push_back(
+        VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT);
+#else
+    externalSemaphoreHandleTypeList.push_back(
+        VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD);
+#endif
+
+    return externalSemaphoreHandleTypeList;
+}
+
+const std::vector<VulkanFormat> getSupportedVulkanFormatList()
+{
+    std::vector<VulkanFormat> formatList;
+
+    formatList.push_back(VULKAN_FORMAT_R8_UINT);
+    formatList.push_back(VULKAN_FORMAT_R8_SINT);
+    formatList.push_back(VULKAN_FORMAT_R8G8_UINT);
+    formatList.push_back(VULKAN_FORMAT_R8G8_SINT);
+    formatList.push_back(VULKAN_FORMAT_R8G8B8A8_UINT);
+    formatList.push_back(VULKAN_FORMAT_R8G8B8A8_SINT);
+    formatList.push_back(VULKAN_FORMAT_R16_UINT);
+    formatList.push_back(VULKAN_FORMAT_R16_SINT);
+    formatList.push_back(VULKAN_FORMAT_R16G16_UINT);
+    formatList.push_back(VULKAN_FORMAT_R16G16_SINT);
+    formatList.push_back(VULKAN_FORMAT_R16G16B16A16_UINT);
+    formatList.push_back(VULKAN_FORMAT_R16G16B16A16_SINT);
+    formatList.push_back(VULKAN_FORMAT_R32_UINT);
+    formatList.push_back(VULKAN_FORMAT_R32_SINT);
+    formatList.push_back(VULKAN_FORMAT_R32_SFLOAT);
+    formatList.push_back(VULKAN_FORMAT_R32G32_UINT);
+    formatList.push_back(VULKAN_FORMAT_R32G32_SINT);
+    formatList.push_back(VULKAN_FORMAT_R32G32_SFLOAT);
+    formatList.push_back(VULKAN_FORMAT_R32G32B32A32_UINT);
+    formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SINT);
+    formatList.push_back(VULKAN_FORMAT_R32G32B32A32_SFLOAT);
+
+    for (size_t fIdx = 0; fIdx < formatList.size(); fIdx++)
+    {
+        switch (formatList[fIdx])
+        {
+            case VULKAN_FORMAT_R8_UINT:
+            case VULKAN_FORMAT_R8_SINT:
+            case VULKAN_FORMAT_R8G8_UINT:
+            case VULKAN_FORMAT_R8G8_SINT:
+            case VULKAN_FORMAT_R8G8B8A8_UINT:
+            case VULKAN_FORMAT_R8G8B8A8_SINT:
+            case VULKAN_FORMAT_R16_UINT:
+            case VULKAN_FORMAT_R16_SINT:
+            case VULKAN_FORMAT_R16G16_UINT:
+            case VULKAN_FORMAT_R16G16_SINT:
+            case VULKAN_FORMAT_R16G16B16A16_UINT:
+            case VULKAN_FORMAT_R16G16B16A16_SINT:
+            case VULKAN_FORMAT_R32_UINT:
+            case VULKAN_FORMAT_R32_SINT:
+            case VULKAN_FORMAT_R32_SFLOAT:
+            case VULKAN_FORMAT_R32G32_UINT:
+            case VULKAN_FORMAT_R32G32_SINT:
+            case VULKAN_FORMAT_R32G32_SFLOAT:
+            case VULKAN_FORMAT_R32G32B32A32_UINT:
+            case VULKAN_FORMAT_R32G32B32A32_SINT:
+            case VULKAN_FORMAT_R32G32B32A32_SFLOAT: break;
+
+            case VULKAN_FORMAT_UNDEFINED:
+            case VULKAN_FORMAT_R4G4_UNORM_PACK8:
+            case VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16:
+            case VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16:
+            case VULKAN_FORMAT_R5G6B5_UNORM_PACK16:
+            case VULKAN_FORMAT_B5G6R5_UNORM_PACK16:
+            case VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16:
+            case VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16:
+            case VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16:
+            case VULKAN_FORMAT_R8_UNORM:
+            case VULKAN_FORMAT_R8_SNORM:
+            case VULKAN_FORMAT_R8_USCALED:
+            case VULKAN_FORMAT_R8_SSCALED:
+            case VULKAN_FORMAT_R8_SRGB:
+            case VULKAN_FORMAT_R8G8_SNORM:
+            case VULKAN_FORMAT_R8G8_UNORM:
+            case VULKAN_FORMAT_R8G8_USCALED:
+            case VULKAN_FORMAT_R8G8_SSCALED:
+            case VULKAN_FORMAT_R8G8_SRGB:
+            case VULKAN_FORMAT_R8G8B8_UNORM:
+            case VULKAN_FORMAT_R8G8B8_SNORM:
+            case VULKAN_FORMAT_R8G8B8_USCALED:
+            case VULKAN_FORMAT_R8G8B8_SSCALED:
+            case VULKAN_FORMAT_R8G8B8_UINT:
+            case VULKAN_FORMAT_R8G8B8_SINT:
+            case VULKAN_FORMAT_R8G8B8_SRGB:
+            case VULKAN_FORMAT_B8G8R8_UNORM:
+            case VULKAN_FORMAT_B8G8R8_SNORM:
+            case VULKAN_FORMAT_B8G8R8_USCALED:
+            case VULKAN_FORMAT_B8G8R8_SSCALED:
+            case VULKAN_FORMAT_B8G8R8_UINT:
+            case VULKAN_FORMAT_B8G8R8_SINT:
+            case VULKAN_FORMAT_B8G8R8_SRGB:
+            case VULKAN_FORMAT_R8G8B8A8_UNORM:
+            case VULKAN_FORMAT_R8G8B8A8_SNORM:
+            case VULKAN_FORMAT_R8G8B8A8_USCALED:
+            case VULKAN_FORMAT_R8G8B8A8_SSCALED:
+            case VULKAN_FORMAT_R8G8B8A8_SRGB:
+            case VULKAN_FORMAT_B8G8R8A8_UNORM:
+            case VULKAN_FORMAT_B8G8R8A8_SNORM:
+            case VULKAN_FORMAT_B8G8R8A8_USCALED:
+            case VULKAN_FORMAT_B8G8R8A8_SSCALED:
+            case VULKAN_FORMAT_B8G8R8A8_UINT:
+            case VULKAN_FORMAT_B8G8R8A8_SINT:
+            case VULKAN_FORMAT_B8G8R8A8_SRGB:
+            case VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_UINT_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_SINT_PACK32:
+            case VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_UINT_PACK32:
+            case VULKAN_FORMAT_A2R10G10B10_SINT_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_UINT_PACK32:
+            case VULKAN_FORMAT_A2B10G10R10_SINT_PACK32:
+            case VULKAN_FORMAT_R16_UNORM:
+            case VULKAN_FORMAT_R16_SNORM:
+            case VULKAN_FORMAT_R16_USCALED:
+            case VULKAN_FORMAT_R16_SSCALED:
+            case VULKAN_FORMAT_R16_SFLOAT:
+            case VULKAN_FORMAT_R16G16_UNORM:
+            case VULKAN_FORMAT_R16G16_SNORM:
+            case VULKAN_FORMAT_R16G16_USCALED:
+            case VULKAN_FORMAT_R16G16_SSCALED:
+            case VULKAN_FORMAT_R16G16_SFLOAT:
+            case VULKAN_FORMAT_R16G16B16_UNORM:
+            case VULKAN_FORMAT_R16G16B16_SNORM:
+            case VULKAN_FORMAT_R16G16B16_USCALED:
+            case VULKAN_FORMAT_R16G16B16_SSCALED:
+            case VULKAN_FORMAT_R16G16B16_UINT:
+            case VULKAN_FORMAT_R16G16B16_SINT:
+            case VULKAN_FORMAT_R16G16B16_SFLOAT:
+            case VULKAN_FORMAT_R16G16B16A16_UNORM:
+            case VULKAN_FORMAT_R16G16B16A16_SNORM:
+            case VULKAN_FORMAT_R16G16B16A16_USCALED:
+            case VULKAN_FORMAT_R16G16B16A16_SSCALED:
+            case VULKAN_FORMAT_R16G16B16A16_SFLOAT:
+            case VULKAN_FORMAT_R32G32B32_UINT:
+            case VULKAN_FORMAT_R32G32B32_SINT:
+            case VULKAN_FORMAT_R32G32B32_SFLOAT:
+            case VULKAN_FORMAT_R64_UINT:
+            case VULKAN_FORMAT_R64_SINT:
+            case VULKAN_FORMAT_R64_SFLOAT:
+            case VULKAN_FORMAT_R64G64_UINT:
+            case VULKAN_FORMAT_R64G64_SINT:
+            case VULKAN_FORMAT_R64G64_SFLOAT:
+            case VULKAN_FORMAT_R64G64B64_UINT:
+            case VULKAN_FORMAT_R64G64B64_SINT:
+            case VULKAN_FORMAT_R64G64B64_SFLOAT:
+            case VULKAN_FORMAT_R64G64B64A64_UINT:
+            case VULKAN_FORMAT_R64G64B64A64_SINT:
+            case VULKAN_FORMAT_R64G64B64A64_SFLOAT:
+            case VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32:
+            case VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+            case VULKAN_FORMAT_D16_UNORM:
+            case VULKAN_FORMAT_X8_D24_UNORM_PACK32:
+            case VULKAN_FORMAT_D32_SFLOAT:
+            case VULKAN_FORMAT_S8_UINT:
+            case VULKAN_FORMAT_D16_UNORM_S8_UINT:
+            case VULKAN_FORMAT_D24_UNORM_S8_UINT:
+            case VULKAN_FORMAT_D32_SFLOAT_S8_UINT:
+            case VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK:
+            case VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK:
+            case VULKAN_FORMAT_BC2_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC2_SRGB_BLOCK:
+            case VULKAN_FORMAT_BC3_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC3_SRGB_BLOCK:
+            case VULKAN_FORMAT_BC4_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC4_SNORM_BLOCK:
+            case VULKAN_FORMAT_BC5_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC5_SNORM_BLOCK:
+            case VULKAN_FORMAT_BC6H_UFLOAT_BLOCK:
+            case VULKAN_FORMAT_BC6H_SFLOAT_BLOCK:
+            case VULKAN_FORMAT_BC7_UNORM_BLOCK:
+            case VULKAN_FORMAT_BC7_SRGB_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+            case VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+            case VULKAN_FORMAT_EAC_R11_UNORM_BLOCK:
+            case VULKAN_FORMAT_EAC_R11_SNORM_BLOCK:
+            case VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK:
+            case VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK:
+            case VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK:
+            case VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK:
+                ASSERT(0);
+                std::cout << "Unsupport texture format";
+        }
+    }
+
+    return formatList;
+}
+
+uint32_t getVulkanFormatElementSize(VulkanFormat format)
+{
+    switch (format)
+    {
+        case VULKAN_FORMAT_R8_UINT: return uint32_t(1);
+        case VULKAN_FORMAT_R8_SINT: return uint32_t(1);
+        case VULKAN_FORMAT_R8G8_UINT: return uint32_t(2);
+        case VULKAN_FORMAT_R8G8_SINT: return uint32_t(2);
+        case VULKAN_FORMAT_R8G8B8A8_UINT: return uint32_t(4);
+        case VULKAN_FORMAT_R8G8B8A8_SINT: return uint32_t(4);
+        case VULKAN_FORMAT_R16_UINT: return uint32_t(2);
+        case VULKAN_FORMAT_R16_SINT: return uint32_t(2);
+        case VULKAN_FORMAT_R16G16_UINT: return uint32_t(4);
+        case VULKAN_FORMAT_R16G16_SINT: return uint32_t(4);
+        case VULKAN_FORMAT_R16G16B16A16_UINT: return uint32_t(8);
+        case VULKAN_FORMAT_R16G16B16A16_SINT: return uint32_t(8);
+        case VULKAN_FORMAT_R32_UINT: return uint32_t(4);
+        case VULKAN_FORMAT_R32_SINT: return uint32_t(4);
+        case VULKAN_FORMAT_R32_SFLOAT: return uint32_t(4);
+        case VULKAN_FORMAT_R32G32_UINT: return uint32_t(8);
+        case VULKAN_FORMAT_R32G32_SINT: return uint32_t(8);
+        case VULKAN_FORMAT_R32G32_SFLOAT: return uint32_t(8);
+        case VULKAN_FORMAT_R32G32B32A32_UINT: return uint32_t(16);
+        case VULKAN_FORMAT_R32G32B32A32_SINT: return uint32_t(16);
+        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return uint32_t(16);
+        default: ASSERT(0); std::cout << "Unknown format";
+    }
+
+    return uint32_t(0);
+}
+
+const char *getVulkanFormatGLSLFormat(VulkanFormat format)
+{
+    switch (format)
+    {
+        case VULKAN_FORMAT_R8_UINT: return "r8ui";
+        case VULKAN_FORMAT_R8_SINT: return "r8i";
+        case VULKAN_FORMAT_R8G8_UINT: return "rg8ui";
+        case VULKAN_FORMAT_R8G8_SINT: return "rg8i";
+        case VULKAN_FORMAT_R8G8B8A8_UINT: return "rgba8ui";
+        case VULKAN_FORMAT_R8G8B8A8_SINT: return "rgba8i";
+        case VULKAN_FORMAT_R16_UINT: return "r16ui";
+        case VULKAN_FORMAT_R16_SINT: return "r16i";
+        case VULKAN_FORMAT_R16G16_UINT: return "rg16ui";
+        case VULKAN_FORMAT_R16G16_SINT: return "rg16i";
+        case VULKAN_FORMAT_R16G16B16A16_UINT: return "rgba16ui";
+        case VULKAN_FORMAT_R16G16B16A16_SINT: return "rgba16i";
+        case VULKAN_FORMAT_R32_UINT: return "r32ui";
+        case VULKAN_FORMAT_R32_SINT: return "r32i";
+        case VULKAN_FORMAT_R32_SFLOAT: return "r32f";
+        case VULKAN_FORMAT_R32G32_UINT: return "rg32ui";
+        case VULKAN_FORMAT_R32G32_SINT: return "rg32i";
+        case VULKAN_FORMAT_R32G32_SFLOAT: return "rg32f";
+        case VULKAN_FORMAT_R32G32B32A32_UINT: return "rgba32ui";
+        case VULKAN_FORMAT_R32G32B32A32_SINT: return "rgba32i";
+        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "rgba32f";
+        default: ASSERT(0); std::cout << "Unknown format";
+    }
+
+    return (const char *)size_t(0);
+}
+
+const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format)
+{
+    switch (format)
+    {
+        case VULKAN_FORMAT_R8_UINT:
+        case VULKAN_FORMAT_R8G8_UINT:
+        case VULKAN_FORMAT_R8G8B8A8_UINT:
+        case VULKAN_FORMAT_R16_UINT:
+        case VULKAN_FORMAT_R16G16_UINT:
+        case VULKAN_FORMAT_R16G16B16A16_UINT:
+        case VULKAN_FORMAT_R32_UINT:
+        case VULKAN_FORMAT_R32G32_UINT:
+        case VULKAN_FORMAT_R32G32B32A32_UINT: return "u";
+
+        case VULKAN_FORMAT_R8_SINT:
+        case VULKAN_FORMAT_R8G8_SINT:
+        case VULKAN_FORMAT_R8G8B8A8_SINT:
+        case VULKAN_FORMAT_R16_SINT:
+        case VULKAN_FORMAT_R16G16_SINT:
+        case VULKAN_FORMAT_R16G16B16A16_SINT:
+        case VULKAN_FORMAT_R32_SINT:
+        case VULKAN_FORMAT_R32G32_SINT:
+        case VULKAN_FORMAT_R32G32B32A32_SINT: return "i";
+
+        case VULKAN_FORMAT_R32_SFLOAT:
+        case VULKAN_FORMAT_R32G32_SFLOAT:
+        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "";
+
+        default: ASSERT(0); std::cout << "Unknown format";
+    }
+
+    return "";
+}
+
+std::string prepareVulkanShader(
+    std::string shaderCode,
+    const std::map<std::string, std::string> &patternToSubstituteMap)
+{
+    for (std::map<std::string, std::string>::const_iterator psIt =
+             patternToSubstituteMap.begin();
+         psIt != patternToSubstituteMap.end(); ++psIt)
+    {
+        std::string::size_type pos = 0u;
+        while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos)
+        {
+            shaderCode.replace(pos, psIt->first.length(), psIt->second);
+            pos += psIt->second.length();
+        }
+    }
+
+    return shaderCode;
+}
+
+std::ostream &operator<<(std::ostream &os,
+                         VulkanMemoryTypeProperty memoryTypeProperty)
+{
+    switch (memoryTypeProperty)
+    {
+        case VULKAN_MEMORY_TYPE_PROPERTY_NONE: return os << "None";
+        case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL:
+            return os << "Device local";
+        case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT:
+            return os << "Host visible and coherent";
+        case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED:
+            return os << "Host visible and cached";
+        case VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT:
+            return os << "Host visible, cached and coherent";
+        case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT:
+            return os << "Device local, Host visible and coherent";
+        case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED:
+            return os << "Device local, Host visible and cached";
+        case VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT:
+            return os << "Device local, Host visible, cached and coherent";
+    }
+
+    return os;
+}
+
+std::ostream &
+operator<<(std::ostream &os,
+           VulkanExternalMemoryHandleType externalMemoryHandleType)
+{
+    switch (externalMemoryHandleType)
+    {
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE: return os << "None";
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD:
+            return os << "Opaque file descriptor";
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            return os << "Opaque NT handle";
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            return os << "Opaque D3DKMT handle";
+        case VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+            return os << "Opaque NT and D3DKMT handle";
+    }
+
+    return os;
+}
+
+std::ostream &
+operator<<(std::ostream &os,
+           VulkanExternalSemaphoreHandleType externalSemaphoreHandleType)
+{
+    switch (externalSemaphoreHandleType)
+    {
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE: return os << "None";
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD:
+            return os << "Opaque file descriptor";
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT:
+            return os << "Opaque NT handle";
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT:
+            return os << "Opaque D3DKMT handle";
+        case VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT:
+            return os << "Opaque NT and D3DKMT handle";
+    }
+
+    return os;
+}
+
+std::ostream &operator<<(std::ostream &os, VulkanFormat format)
+{
+    switch (format)
+    {
+        case VULKAN_FORMAT_R8_UINT: return os << "R8_UINT";
+        case VULKAN_FORMAT_R8_SINT: return os << "R8_SINT";
+        case VULKAN_FORMAT_R8G8_UINT: return os << "R8G8_UINT";
+        case VULKAN_FORMAT_R8G8_SINT: return os << "R8G8_SINT";
+        case VULKAN_FORMAT_R8G8B8A8_UINT: return os << "R8G8B8A8_UINT";
+        case VULKAN_FORMAT_R8G8B8A8_SINT: return os << "R8G8B8A8_SINT";
+        case VULKAN_FORMAT_R16_UINT: return os << "R16_UINT";
+        case VULKAN_FORMAT_R16_SINT: return os << "R16_SINT";
+        case VULKAN_FORMAT_R16G16_UINT: return os << "R16G16_UINT";
+        case VULKAN_FORMAT_R16G16_SINT: return os << "R16G16_SINT";
+        case VULKAN_FORMAT_R16G16B16A16_UINT: return os << "R16G16B16A16_UINT";
+        case VULKAN_FORMAT_R16G16B16A16_SINT: return os << "R16G16B16A16_SINT";
+        case VULKAN_FORMAT_R32_UINT: return os << "R32_UINT";
+        case VULKAN_FORMAT_R32_SINT: return os << "R32_SINT";
+        case VULKAN_FORMAT_R32_SFLOAT: return os << "R32_SFLOAT";
+        case VULKAN_FORMAT_R32G32_UINT: return os << "R32G32_UINT";
+        case VULKAN_FORMAT_R32G32_SINT: return os << "R32G32_SINT";
+        case VULKAN_FORMAT_R32G32_SFLOAT: return os << "R32G32_SFLOAT";
+        case VULKAN_FORMAT_R32G32B32A32_UINT: return os << "R32G32B32A32_UINT";
+        case VULKAN_FORMAT_R32G32B32A32_SINT: return os << "R32G32B32A32_SINT";
+        case VULKAN_FORMAT_R32G32B32A32_SFLOAT:
+            return os << "R32G32B32A32_SFLOAT";
+            break;
+        default: ASSERT(0); std::cout << "Unknown format";
+    }
+
+    return os;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
new file mode 100644
index 00000000..7022fd5a
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
@@ -0,0 +1,69 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_utility_hpp_
+#define _vulkan_utility_hpp_
+
+#include "vulkan_wrapper_types.hpp"
+#include <vector>
+#include <ostream>
+#include <string.h>
+#include <map>
+#include "../../../test_common/harness/testHarness.h"
+
+#define STRING_(str) #str
+#define STRING(str) STRING_(str)
+
+#define ROUND_UP(n, multiple)                                                  \
+    (((n) + (multiple)-1) - ((((n) + (multiple)-1)) % (multiple)))
+
+const VulkanInstance& getVulkanInstance();
+const VulkanPhysicalDevice& getVulkanPhysicalDevice();
+const VulkanQueueFamily&
+getVulkanQueueFamily(uint32_t queueFlags = VULKAN_QUEUE_FLAG_MASK_ALL);
+const VulkanMemoryType&
+getVulkanMemoryType(const VulkanDevice& device,
+                    VulkanMemoryTypeProperty memoryTypeProperty);
+bool checkVkSupport();
+const VulkanQueueFamilyList& getEmptyVulkanQueueFamilyList();
+const VulkanDescriptorSetLayoutList& getEmptyVulkanDescriptorSetLayoutList();
+const VulkanQueueFamilyToQueueCountMap&
+getDefaultVulkanQueueFamilyToQueueCountMap();
+const std::vector<VulkanExternalMemoryHandleType>
+getSupportedVulkanExternalMemoryHandleTypeList();
+const std::vector<VulkanExternalSemaphoreHandleType>
+getSupportedVulkanExternalSemaphoreHandleTypeList();
+const std::vector<VulkanFormat> getSupportedVulkanFormatList();
+
+uint32_t getVulkanFormatElementSize(VulkanFormat format);
+const char* getVulkanFormatGLSLFormat(VulkanFormat format);
+const char* getVulkanFormatGLSLTypePrefix(VulkanFormat format);
+
+std::string prepareVulkanShader(
+    std::string shaderCode,
+    const std::map<std::string, std::string>& patternToSubstituteMap);
+
+std::ostream& operator<<(std::ostream& os,
+                         VulkanMemoryTypeProperty memoryTypeProperty);
+std::ostream&
+operator<<(std::ostream& os,
+           VulkanExternalMemoryHandleType externalMemoryHandleType);
+std::ostream&
+operator<<(std::ostream& os,
+           VulkanExternalSemaphoreHandleType externalSemaphoreHandleType);
+std::ostream& operator<<(std::ostream& os, VulkanFormat format);
+
+#endif // _vulkan_utility_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
new file mode 100644
index 00000000..c044e009
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -0,0 +1,2075 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifdef _WIN32
+#define NOMINMAX
+#include <Windows.h>
+#include <dxgi1_2.h>
+#include <aclapi.h>
+#endif
+#include <vulkan/vulkan.h>
+#include "vulkan_wrapper.hpp"
+#if defined(__linux__) && !defined(__ANDROID__)
+#include <gnu/libc-version.h>
+#include <dlfcn.h>
+#elif defined(__ANDROID__)
+#include <dlfcn.h>
+#endif
+#if defined _WIN32
+#define LoadFunction GetProcAddress
+#elif defined __linux
+#define LoadFunction dlsym
+#endif
+
+extern "C" {
+#define VK_FUNC_DECL(name) PFN_##name _##name = NULL;
+VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+}
+
+#define WAIVED 2
+#define HANDLE_ERROR -1
+
+#define CHECK_VK(call)                                                         \
+    if (call != VK_SUCCESS) return call;
+///////////////////////////////////
+// VulkanInstance implementation //
+///////////////////////////////////
+
+VulkanInstance::VulkanInstance(const VulkanInstance &instance)
+    : m_vkInstance(instance.m_vkInstance),
+      m_physicalDeviceList(instance.m_physicalDeviceList)
+{}
+
+VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE)
+{
+#if defined(__linux__) && !defined(__ANDROID__)
+    char *glibcVersion = strdup(gnu_get_libc_version());
+    int majNum = (int)atoi(strtok(glibcVersion, "."));
+    int minNum = (int)atoi(strtok(NULL, "."));
+    free(glibcVersion);
+    if ((majNum < 2) || (majNum == 2 && minNum < 17))
+    {
+        // WAIVE_TEST() << "Insufficient GLIBC version. Test waived!";
+    }
+#endif
+
+#if defined(_WIN32) || defined(_WIN64)
+    const char *vulkanLoaderLibraryName = "vulkan-1.dll";
+#elif defined(__linux__)
+    const char *vulkanLoaderLibraryName = "libvulkan.so.1";
+#endif
+#ifdef _WIN32
+    HINSTANCE hDLL;
+    hDLL = LoadLibrary(vulkanLoaderLibraryName);
+    if (hDLL == NULL)
+    {
+        throw std::runtime_error("LoadLibrary failed!");
+    }
+    vkGetInstanceProcAddr =
+        (PFN_vkGetInstanceProcAddr)LoadFunction(hDLL, "vkGetInstanceProcAddr");
+#else
+#if !defined(__APPLE__)
+    void *handle;
+    handle = dlopen(vulkanLoaderLibraryName, RTLD_LAZY);
+    if (!handle)
+    {
+        fputs(dlerror(), stderr);
+        throw std::runtime_error("dlopen failed !!!");
+    }
+    vkGetInstanceProcAddr = (PFN_vkGetInstanceProcAddr)LoadFunction(
+        handle, "vkGetInstanceProcAddr");
+#endif
+#endif
+    if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL)
+    {
+        throw std::runtime_error("vkGetInstanceProcAddr() not found!");
+    }
+#define VK_GET_NULL_INSTANCE_PROC_ADDR(name)                                   \
+    _##name = (PFN_##name)vkGetInstanceProcAddr(NULL, #name);
+
+    if ((unsigned long long)vkGetInstanceProcAddr == (unsigned long long)NULL)
+    {
+        throw std::runtime_error("Couldn't obtain address for function");
+    }
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceExtensionProperties);
+    uint32_t instanceExtensionPropertiesCount;
+    VkResult vkStatus = VK_SUCCESS;
+    vkStatus = vkEnumerateInstanceExtensionProperties(
+        NULL, &instanceExtensionPropertiesCount, NULL);
+    // Something went wrong in vulkan initialization (most likely incompatible
+    // device/driver combination)
+    if (vkStatus == VK_ERROR_INCOMPATIBLE_DRIVER)
+    {
+        throw std::runtime_error(
+            "Waiving vulkan test because "
+            "vkEnumerateInstanceExtensionProperties failed.");
+        // return WAIVED;
+    }
+
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceVersion);
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkEnumerateInstanceLayerProperties);
+    VK_GET_NULL_INSTANCE_PROC_ADDR(vkCreateInstance);
+#undef VK_GET_NULL_INSTANCE_PROC_ADDR
+
+    VkApplicationInfo vkApplicationInfo = {};
+    vkApplicationInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
+    vkApplicationInfo.pNext = NULL;
+    vkApplicationInfo.pApplicationName = "Default app";
+    vkApplicationInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
+    vkApplicationInfo.pEngineName = "No engine";
+    vkApplicationInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0);
+    vkApplicationInfo.apiVersion = VK_API_VERSION_1_0;
+
+    std::vector<const char *> enabledExtensionNameList;
+    enabledExtensionNameList.push_back(
+        VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME);
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME);
+
+    std::vector<VkExtensionProperties> vkExtensionPropertiesList(
+        instanceExtensionPropertiesCount);
+    vkEnumerateInstanceExtensionProperties(NULL,
+                                           &instanceExtensionPropertiesCount,
+                                           vkExtensionPropertiesList.data());
+
+    for (size_t eenIdx = 0; eenIdx < enabledExtensionNameList.size(); eenIdx++)
+    {
+        bool isSupported = false;
+        for (size_t epIdx = 0; epIdx < vkExtensionPropertiesList.size();
+             epIdx++)
+        {
+            if (!strcmp(enabledExtensionNameList[eenIdx],
+                        vkExtensionPropertiesList[epIdx].extensionName))
+            {
+                isSupported = true;
+                break;
+            }
+        }
+        if (!isSupported)
+        {
+            return;
+        }
+    }
+
+    VkInstanceCreateInfo vkInstanceCreateInfo = {};
+    vkInstanceCreateInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
+    vkInstanceCreateInfo.pNext = NULL;
+    vkInstanceCreateInfo.flags = 0;
+    vkInstanceCreateInfo.pApplicationInfo = &vkApplicationInfo;
+    vkInstanceCreateInfo.enabledLayerCount = 0;
+    vkInstanceCreateInfo.ppEnabledLayerNames = NULL;
+    vkInstanceCreateInfo.enabledExtensionCount =
+        (uint32_t)enabledExtensionNameList.size();
+    vkInstanceCreateInfo.ppEnabledExtensionNames =
+        enabledExtensionNameList.data();
+
+    vkCreateInstance(&vkInstanceCreateInfo, NULL, &m_vkInstance);
+
+#define VK_FUNC_DECL(name)                                                     \
+    _##name = (PFN_##name)vkGetInstanceProcAddr(m_vkInstance, #name);          \
+    // ASSERT_NEQ((unsigned long long)name, 0ULL) << "Couldn't obtain address
+    // for function" << #name;
+
+    VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+    VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+
+    uint32_t physicalDeviceCount = 0;
+    vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount, NULL);
+    // CHECK_NEQ(physicalDeviceCount, uint32_t(0));
+
+    if (physicalDeviceCount == uint32_t(0))
+    {
+        throw std::runtime_error("failed to find GPUs with Vulkan support!");
+    }
+
+    std::vector<VkPhysicalDevice> vkPhysicalDeviceList(physicalDeviceCount,
+                                                       VK_NULL_HANDLE);
+    vkEnumeratePhysicalDevices(m_vkInstance, &physicalDeviceCount,
+                               vkPhysicalDeviceList.data());
+
+    for (size_t ppdIdx = 0; ppdIdx < vkPhysicalDeviceList.size(); ppdIdx++)
+    {
+        VulkanPhysicalDevice *physicalDevice =
+            new VulkanPhysicalDevice(vkPhysicalDeviceList[ppdIdx]);
+        m_physicalDeviceList.add(*physicalDevice);
+    }
+}
+
+VulkanInstance::~VulkanInstance()
+{
+    for (size_t pdIdx = 0; pdIdx < m_physicalDeviceList.size(); pdIdx++)
+    {
+        const VulkanPhysicalDevice &physicalDevice =
+            m_physicalDeviceList[pdIdx];
+        delete &physicalDevice;
+    }
+    if (m_vkInstance)
+    {
+        vkDestroyInstance(m_vkInstance, NULL);
+    }
+}
+
+const VulkanPhysicalDeviceList &VulkanInstance::getPhysicalDeviceList() const
+{
+    return m_physicalDeviceList;
+}
+
+VulkanInstance::operator VkInstance() const { return m_vkInstance; }
+
+/////////////////////////////////////////
+// VulkanPhysicalDevice implementation //
+/////////////////////////////////////////
+
+VulkanPhysicalDevice::VulkanPhysicalDevice(
+    const VulkanPhysicalDevice &physicalDevice)
+    : m_vkPhysicalDevice(physicalDevice.m_vkPhysicalDevice),
+      m_vkPhysicalDeviceProperties(physicalDevice.m_vkPhysicalDeviceProperties),
+      m_vkDeviceNodeMask(physicalDevice.m_vkDeviceNodeMask),
+      m_vkPhysicalDeviceFeatures(physicalDevice.m_vkPhysicalDeviceFeatures),
+      m_vkPhysicalDeviceMemoryProperties(
+          physicalDevice.m_vkPhysicalDeviceMemoryProperties),
+      m_queueFamilyList(physicalDevice.m_queueFamilyList)
+{
+    memcpy(m_vkDeviceUUID, physicalDevice.m_vkDeviceUUID, VK_UUID_SIZE);
+}
+
+VulkanPhysicalDevice::VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice)
+    : m_vkPhysicalDevice(vkPhysicalDevice)
+{
+    if (m_vkPhysicalDevice == (VkPhysicalDevice)VK_NULL_HANDLE)
+    {
+        throw std::runtime_error("failed to find a suitable GPU!");
+    }
+
+    vkGetPhysicalDeviceProperties(m_vkPhysicalDevice,
+                                  &m_vkPhysicalDeviceProperties);
+    vkGetPhysicalDeviceFeatures(m_vkPhysicalDevice,
+                                &m_vkPhysicalDeviceFeatures);
+
+    VkPhysicalDeviceIDPropertiesKHR vkPhysicalDeviceIDPropertiesKHR = {};
+    vkPhysicalDeviceIDPropertiesKHR.sType =
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR;
+    vkPhysicalDeviceIDPropertiesKHR.pNext = NULL;
+
+    VkPhysicalDeviceProperties2KHR vkPhysicalDeviceProperties2KHR = {};
+    vkPhysicalDeviceProperties2KHR.sType =
+        VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+    vkPhysicalDeviceProperties2KHR.pNext = &vkPhysicalDeviceIDPropertiesKHR;
+
+    vkGetPhysicalDeviceProperties2KHR(m_vkPhysicalDevice,
+                                      &vkPhysicalDeviceProperties2KHR);
+
+    memcpy(m_vkDeviceUUID, vkPhysicalDeviceIDPropertiesKHR.deviceUUID,
+           sizeof(m_vkDeviceUUID));
+    memcpy(m_vkDeviceLUID, vkPhysicalDeviceIDPropertiesKHR.deviceLUID,
+           sizeof(m_vkDeviceLUID));
+    m_vkDeviceNodeMask = vkPhysicalDeviceIDPropertiesKHR.deviceNodeMask;
+
+    uint32_t queueFamilyCount = 0;
+    vkGetPhysicalDeviceQueueFamilyProperties(m_vkPhysicalDevice,
+                                             &queueFamilyCount, NULL);
+
+    std::vector<VkQueueFamilyProperties> vkQueueFamilyPropertiesList(
+        queueFamilyCount);
+    vkGetPhysicalDeviceQueueFamilyProperties(
+        m_vkPhysicalDevice, &queueFamilyCount,
+        vkQueueFamilyPropertiesList.data());
+
+    for (size_t qfpIdx = 0; qfpIdx < vkQueueFamilyPropertiesList.size();
+         qfpIdx++)
+    {
+        VulkanQueueFamily *queueFamily = new VulkanQueueFamily(
+            uint32_t(qfpIdx), vkQueueFamilyPropertiesList[qfpIdx]);
+        m_queueFamilyList.add(*queueFamily);
+    }
+
+    vkGetPhysicalDeviceMemoryProperties(m_vkPhysicalDevice,
+                                        &m_vkPhysicalDeviceMemoryProperties);
+
+    for (uint32_t mhIdx = 0;
+         mhIdx < m_vkPhysicalDeviceMemoryProperties.memoryHeapCount; mhIdx++)
+    {
+        VulkanMemoryHeap *memoryHeap = new VulkanMemoryHeap(
+            mhIdx, m_vkPhysicalDeviceMemoryProperties.memoryHeaps[mhIdx].size,
+            (VulkanMemoryHeapFlag)m_vkPhysicalDeviceMemoryProperties
+                .memoryHeaps[mhIdx]
+                .flags);
+        m_memoryHeapList.add(*memoryHeap);
+    }
+
+    for (uint32_t mtIdx = 0;
+         mtIdx < m_vkPhysicalDeviceMemoryProperties.memoryTypeCount; mtIdx++)
+    {
+        const VulkanMemoryHeap &memoryHeap = m_memoryHeapList
+            [m_vkPhysicalDeviceMemoryProperties.memoryTypes[mtIdx].heapIndex];
+        VulkanMemoryType *memoryType = new VulkanMemoryType(
+            mtIdx,
+            (VulkanMemoryTypeProperty)m_vkPhysicalDeviceMemoryProperties
+                .memoryTypes[mtIdx]
+                .propertyFlags,
+            memoryHeap);
+        m_memoryTypeList.add(*memoryType);
+    }
+}
+
+VulkanPhysicalDevice::~VulkanPhysicalDevice()
+{
+    for (size_t mtIdx = 0; mtIdx < m_memoryTypeList.size(); mtIdx++)
+    {
+        const VulkanMemoryType &memoryType = m_memoryTypeList[mtIdx];
+        delete &memoryType;
+    }
+
+    for (size_t mhIdx = 0; mhIdx < m_memoryHeapList.size(); mhIdx++)
+    {
+        const VulkanMemoryHeap &memoryHeap = m_memoryHeapList[mhIdx];
+        delete &memoryHeap;
+    }
+
+    for (size_t qfIdx = 0; qfIdx < m_queueFamilyList.size(); qfIdx++)
+    {
+        const VulkanQueueFamily &queueFamily = m_queueFamilyList[qfIdx];
+        delete &queueFamily;
+    }
+}
+
+
+const VulkanQueueFamilyList &VulkanPhysicalDevice::getQueueFamilyList() const
+{
+    return m_queueFamilyList;
+}
+
+const VulkanMemoryHeapList &VulkanPhysicalDevice::getMemoryHeapList() const
+{
+    return m_memoryHeapList;
+}
+
+const VulkanMemoryTypeList &VulkanPhysicalDevice::getMemoryTypeList() const
+{
+    return m_memoryTypeList;
+}
+
+const uint8_t *VulkanPhysicalDevice::getUUID() const { return m_vkDeviceUUID; }
+
+const uint8_t *VulkanPhysicalDevice::getLUID() const { return m_vkDeviceLUID; }
+
+uint32_t VulkanPhysicalDevice::getNodeMask() const
+{
+    return m_vkDeviceNodeMask;
+}
+
+VulkanPhysicalDevice::operator VkPhysicalDevice() const
+{
+    return m_vkPhysicalDevice;
+}
+
+bool operator<(const VulkanQueueFamily &queueFamilyA,
+               const VulkanQueueFamily &queueFamilyB)
+{
+    return (uint32_t)queueFamilyA < (uint32_t)queueFamilyB;
+}
+
+/////////////////////////////////////
+// VulkanMemoryHeap implementation //
+/////////////////////////////////////
+
+VulkanMemoryHeap::VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap)
+    : m_memoryHeapIndex(memoryHeap.m_memoryHeapIndex),
+      m_size(memoryHeap.m_size), m_memoryHeapFlag(memoryHeap.m_memoryHeapFlag)
+{}
+
+VulkanMemoryHeap::VulkanMemoryHeap(uint32_t memoryHeapIndex, uint64_t size,
+                                   VulkanMemoryHeapFlag memoryHeapFlag)
+    : m_memoryHeapIndex(memoryHeapIndex), m_size(size),
+      m_memoryHeapFlag(memoryHeapFlag)
+{}
+
+VulkanMemoryHeap::~VulkanMemoryHeap() {}
+
+uint64_t VulkanMemoryHeap::getSize() const { return m_size; }
+
+
+VulkanMemoryHeapFlag VulkanMemoryHeap::getMemoryHeapFlag() const
+{
+    return m_memoryHeapFlag;
+}
+
+VulkanMemoryHeap::operator uint32_t() const { return m_memoryHeapIndex; }
+
+/////////////////////////////////////
+// VulkanMemoryType implementation //
+/////////////////////////////////////
+
+VulkanMemoryType::VulkanMemoryType(const VulkanMemoryType &memoryType)
+    : m_memoryTypeIndex(memoryType.m_memoryTypeIndex),
+      m_memoryTypeProperty(memoryType.m_memoryTypeProperty),
+      m_memoryHeap(memoryType.m_memoryHeap)
+{}
+
+VulkanMemoryType::VulkanMemoryType(uint32_t memoryTypeIndex,
+                                   VulkanMemoryTypeProperty memoryTypeProperty,
+                                   const VulkanMemoryHeap &memoryHeap)
+    : m_memoryTypeIndex(memoryTypeIndex),
+      m_memoryTypeProperty(memoryTypeProperty), m_memoryHeap(memoryHeap)
+{}
+
+VulkanMemoryType::~VulkanMemoryType() {}
+
+VulkanMemoryTypeProperty VulkanMemoryType::getMemoryTypeProperty() const
+{
+    return m_memoryTypeProperty;
+}
+
+const VulkanMemoryHeap &VulkanMemoryType::getMemoryHeap() const
+{
+    return m_memoryHeap;
+}
+
+VulkanMemoryType::operator uint32_t() const { return m_memoryTypeIndex; }
+
+//////////////////////////////////////
+// VulkanQueueFamily implementation //
+//////////////////////////////////////
+
+VulkanQueueFamily::VulkanQueueFamily(const VulkanQueueFamily &queueFamily)
+    : m_queueFamilyIndex(queueFamily.m_queueFamilyIndex),
+      m_vkQueueFamilyProperties(queueFamily.m_vkQueueFamilyProperties)
+{}
+
+VulkanQueueFamily::VulkanQueueFamily(
+    uint32_t queueFamilyIndex, VkQueueFamilyProperties vkQueueFamilyProperties)
+    : m_queueFamilyIndex(queueFamilyIndex),
+      m_vkQueueFamilyProperties(vkQueueFamilyProperties)
+{}
+
+VulkanQueueFamily::~VulkanQueueFamily() {}
+
+uint32_t VulkanQueueFamily::getQueueFlags() const
+{
+    return m_vkQueueFamilyProperties.queueFlags
+        & (uint32_t)VULKAN_QUEUE_FLAG_MASK_ALL;
+}
+
+uint32_t VulkanQueueFamily::getQueueCount() const
+{
+    return m_vkQueueFamilyProperties.queueCount;
+}
+
+VulkanQueueFamily::operator uint32_t() const { return m_queueFamilyIndex; }
+
+/////////////////////////////////
+// VulkanDevice implementation //
+/////////////////////////////////
+
+VulkanDevice::VulkanDevice(const VulkanDevice &device)
+    : m_physicalDevice(device.m_physicalDevice), m_vkDevice(device.m_vkDevice)
+{}
+
+VulkanDevice::VulkanDevice(
+    const VulkanPhysicalDevice &physicalDevice,
+    const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap)
+    : m_physicalDevice(physicalDevice), m_vkDevice(NULL)
+{
+    uint32_t maxQueueCount = 0;
+    for (uint32_t qfIdx = 0;
+         qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++)
+    {
+        maxQueueCount =
+            std::max(maxQueueCount, queueFamilyToQueueCountMap[qfIdx]);
+    }
+
+    std::vector<VkDeviceQueueCreateInfo> vkDeviceQueueCreateInfoList;
+    std::vector<float> queuePriorities(maxQueueCount);
+    for (uint32_t qfIdx = 0;
+         qfIdx < (uint32_t)physicalDevice.getQueueFamilyList().size(); qfIdx++)
+    {
+        if (queueFamilyToQueueCountMap[qfIdx])
+        {
+            VkDeviceQueueCreateInfo vkDeviceQueueCreateInfo = {};
+            vkDeviceQueueCreateInfo.sType =
+                VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
+            vkDeviceQueueCreateInfo.pNext = NULL;
+            vkDeviceQueueCreateInfo.flags = 0;
+            vkDeviceQueueCreateInfo.queueFamilyIndex = qfIdx;
+            vkDeviceQueueCreateInfo.queueCount =
+                queueFamilyToQueueCountMap[qfIdx];
+            vkDeviceQueueCreateInfo.pQueuePriorities = queuePriorities.data();
+
+            vkDeviceQueueCreateInfoList.push_back(vkDeviceQueueCreateInfo);
+        }
+    }
+
+    std::vector<const char *> enabledExtensionNameList;
+    enabledExtensionNameList.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME);
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME);
+#if defined(_WIN32) || defined(_WIN64)
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME);
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME);
+#else
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME);
+    enabledExtensionNameList.push_back(
+        VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME);
+#endif
+
+
+    VkDeviceCreateInfo vkDeviceCreateInfo = {};
+    vkDeviceCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
+    vkDeviceCreateInfo.pNext = NULL;
+    vkDeviceCreateInfo.flags = 0;
+    vkDeviceCreateInfo.queueCreateInfoCount =
+        (uint32_t)vkDeviceQueueCreateInfoList.size();
+    vkDeviceCreateInfo.pQueueCreateInfos = vkDeviceQueueCreateInfoList.data();
+    vkDeviceCreateInfo.enabledLayerCount = 0;
+    vkDeviceCreateInfo.ppEnabledLayerNames = NULL;
+    vkDeviceCreateInfo.enabledExtensionCount =
+        (uint32_t)enabledExtensionNameList.size();
+    vkDeviceCreateInfo.ppEnabledExtensionNames =
+        enabledExtensionNameList.data();
+    vkDeviceCreateInfo.pEnabledFeatures = NULL;
+
+    vkCreateDevice(physicalDevice, &vkDeviceCreateInfo, NULL, &m_vkDevice);
+
+    for (uint32_t qfIdx = 0;
+         qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
+         qfIdx++)
+    {
+        VulkanQueueList *queueList = new VulkanQueueList();
+        m_queueFamilyIndexToQueueListMap.insert(qfIdx, *queueList);
+        for (uint32_t qIdx = 0; qIdx < queueFamilyToQueueCountMap[qfIdx];
+             qIdx++)
+        {
+            VkQueue vkQueue;
+            vkGetDeviceQueue(m_vkDevice, qfIdx, qIdx, &vkQueue);
+            VulkanQueue *queue = new VulkanQueue(vkQueue);
+            m_queueFamilyIndexToQueueListMap[qfIdx].add(*queue);
+        }
+    }
+}
+
+VulkanDevice::~VulkanDevice()
+{
+    for (uint32_t qfIdx = 0;
+         qfIdx < (uint32_t)m_physicalDevice.getQueueFamilyList().size();
+         qfIdx++)
+    {
+        for (size_t qIdx = 0;
+             qIdx < m_queueFamilyIndexToQueueListMap[qfIdx].size(); qIdx++)
+        {
+            VulkanQueue &queue = m_queueFamilyIndexToQueueListMap[qfIdx][qIdx];
+            delete &queue;
+        }
+        VulkanQueueList &queueList = m_queueFamilyIndexToQueueListMap[qfIdx];
+        delete &queueList;
+    }
+    vkDestroyDevice(m_vkDevice, NULL);
+}
+
+const VulkanPhysicalDevice &VulkanDevice::getPhysicalDevice() const
+{
+    return m_physicalDevice;
+}
+
+VulkanQueue &VulkanDevice::getQueue(const VulkanQueueFamily &queueFamily,
+                                    uint32_t queueIndex)
+{
+    return m_queueFamilyIndexToQueueListMap[queueFamily][queueIndex];
+}
+
+VulkanDevice::operator VkDevice() const { return m_vkDevice; }
+
+////////////////////////////////
+// VulkanQueue implementation //
+////////////////////////////////
+
+VulkanQueue::VulkanQueue(const VulkanQueue &queue): m_vkQueue(queue.m_vkQueue)
+{}
+
+VulkanQueue::VulkanQueue(VkQueue vkQueue): m_vkQueue(vkQueue) {}
+
+VulkanQueue::~VulkanQueue() {}
+
+void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList,
+                         const VulkanCommandBufferList &commandBufferList,
+                         const VulkanSemaphoreList &signalSemaphoreList)
+{
+    std::vector<VkPipelineStageFlags> vkPipelineStageFlagsList(
+        waitSemaphoreList.size(), VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
+
+    VkSubmitInfo vkSubmitInfo = {};
+    vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
+    vkSubmitInfo.pNext = NULL;
+    vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size();
+    vkSubmitInfo.pWaitSemaphores = waitSemaphoreList;
+    vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data();
+    vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
+    vkSubmitInfo.pCommandBuffers = commandBufferList;
+    vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size();
+    vkSubmitInfo.pSignalSemaphores = signalSemaphoreList;
+
+    vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL);
+}
+
+void VulkanQueue::submit(const VulkanSemaphore &waitSemaphore,
+                         const VulkanCommandBuffer &commandBuffer,
+                         const VulkanSemaphore &signalSemaphore)
+{
+    VulkanSemaphoreList waitSemaphoreList;
+    VulkanCommandBufferList commandBufferList;
+    VulkanSemaphoreList signalSemaphoreList;
+
+    waitSemaphoreList.add(waitSemaphore);
+    commandBufferList.add(commandBuffer);
+    signalSemaphoreList.add(signalSemaphore);
+
+    submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer,
+                         const VulkanSemaphore &signalSemaphore)
+{
+    VulkanSemaphoreList waitSemaphoreList;
+    VulkanCommandBufferList commandBufferList;
+    VulkanSemaphoreList signalSemaphoreList;
+
+    commandBufferList.add(commandBuffer);
+    signalSemaphoreList.add(signalSemaphore);
+
+    submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::submit(const VulkanCommandBuffer &commandBuffer)
+{
+    VulkanSemaphoreList waitSemaphoreList;
+    VulkanCommandBufferList commandBufferList;
+    VulkanSemaphoreList signalSemaphoreList;
+
+    commandBufferList.add(commandBuffer);
+
+    submit(waitSemaphoreList, commandBufferList, signalSemaphoreList);
+}
+
+void VulkanQueue::waitIdle() { vkQueueWaitIdle(m_vkQueue); }
+
+VulkanQueue::operator VkQueue() const { return m_vkQueue; }
+
+/////////////////////////////////////////////////////
+// VulkanDescriptorSetLayoutBinding implementation //
+/////////////////////////////////////////////////////
+
+VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding(
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+    : m_vkDescriptorSetLayoutBinding(
+        descriptorSetLayoutBinding.m_vkDescriptorSetLayoutBinding)
+{}
+
+VulkanDescriptorSetLayoutBinding::VulkanDescriptorSetLayoutBinding(
+    uint32_t binding, VulkanDescriptorType descriptorType,
+    uint32_t descriptorCount, VulkanShaderStage shaderStage)
+{
+    m_vkDescriptorSetLayoutBinding.binding = binding;
+    m_vkDescriptorSetLayoutBinding.descriptorType =
+        (VkDescriptorType)descriptorType;
+    m_vkDescriptorSetLayoutBinding.descriptorCount = descriptorCount;
+    m_vkDescriptorSetLayoutBinding.stageFlags =
+        (VkShaderStageFlags)(VkShaderStageFlagBits)shaderStage;
+    m_vkDescriptorSetLayoutBinding.pImmutableSamplers = NULL;
+}
+
+VulkanDescriptorSetLayoutBinding::~VulkanDescriptorSetLayoutBinding() {}
+
+VulkanDescriptorSetLayoutBinding::operator VkDescriptorSetLayoutBinding() const
+{
+    return m_vkDescriptorSetLayoutBinding;
+}
+
+//////////////////////////////////////////////
+// VulkanDescriptorSetLayout implementation //
+//////////////////////////////////////////////
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+    const VulkanDescriptorSetLayout &descriptorSetLayout)
+    : m_device(descriptorSetLayout.m_device),
+      m_vkDescriptorSetLayout(descriptorSetLayout.m_vkDescriptorSetLayout)
+{}
+
+void VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutCommon(
+    const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{
+    VkDescriptorSetLayoutCreateInfo vkDescriptorSetLayoutCreateInfo = {};
+    vkDescriptorSetLayoutCreateInfo.sType =
+        VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+    vkDescriptorSetLayoutCreateInfo.pNext = NULL;
+    vkDescriptorSetLayoutCreateInfo.flags = 0;
+    vkDescriptorSetLayoutCreateInfo.bindingCount =
+        (uint32_t)descriptorSetLayoutBindingList.size();
+    vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList;
+
+    vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo,
+                                NULL, &m_vkDescriptorSetLayout);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+    : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding);
+
+    VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1)
+    : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0);
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1);
+
+    VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::VulkanDescriptorSetLayout(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+    : m_device(device), m_vkDescriptorSetLayout(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorSetLayout::~VulkanDescriptorSetLayout()
+{
+    if (m_vkDescriptorSetLayout != VK_NULL_HANDLE)
+    {
+        vkDestroyDescriptorSetLayout(m_device, m_vkDescriptorSetLayout, NULL);
+    }
+}
+
+VulkanDescriptorSetLayout::operator VkDescriptorSetLayout() const
+{
+    return m_vkDescriptorSetLayout;
+}
+
+/////////////////////////////////////////
+// VulkanPipelineLayout implementation //
+/////////////////////////////////////////
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+    const VulkanPipelineLayout &pipelineLayout)
+    : m_device(pipelineLayout.m_device),
+      m_vkPipelineLayout(pipelineLayout.m_vkPipelineLayout)
+{}
+
+void VulkanPipelineLayout::VulkanPipelineLayoutCommon(
+    const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+{
+    VkPipelineLayoutCreateInfo vkPipelineLayoutCreateInfo = {};
+    vkPipelineLayoutCreateInfo.sType =
+        VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
+    vkPipelineLayoutCreateInfo.pNext = NULL;
+    vkPipelineLayoutCreateInfo.flags = 0;
+    vkPipelineLayoutCreateInfo.setLayoutCount =
+        (uint32_t)descriptorSetLayoutList.size();
+    vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList;
+    vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0;
+    vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL;
+
+    vkCreatePipelineLayout(m_device, &vkPipelineLayoutCreateInfo, NULL,
+                           &m_vkPipelineLayout);
+}
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayout &descriptorSetLayout)
+    : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutList descriptorSetLayoutList;
+    descriptorSetLayoutList.add(descriptorSetLayout);
+
+    VulkanPipelineLayoutCommon(descriptorSetLayoutList);
+}
+
+VulkanPipelineLayout::VulkanPipelineLayout(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutList &descriptorSetLayoutList)
+    : m_device(device), m_vkPipelineLayout(VK_NULL_HANDLE)
+{
+    VulkanPipelineLayoutCommon(descriptorSetLayoutList);
+}
+
+VulkanPipelineLayout::~VulkanPipelineLayout()
+{
+    vkDestroyPipelineLayout(m_device, m_vkPipelineLayout, NULL);
+}
+
+VulkanPipelineLayout::operator VkPipelineLayout() const
+{
+    return m_vkPipelineLayout;
+}
+
+///////////////////////////////////////
+// VulkanShaderModule implementation //
+///////////////////////////////////////
+
+VulkanShaderModule::VulkanShaderModule(const VulkanShaderModule &shaderModule)
+    : m_device(shaderModule.m_device),
+      m_vkShaderModule(shaderModule.m_vkShaderModule)
+{}
+
+VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device,
+                                       const std::string &code)
+    : m_device(device)
+{
+    std::string paddedCode = code;
+    while (paddedCode.size() % 4)
+    {
+        paddedCode += " ";
+    }
+
+    VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {};
+    vkShaderModuleCreateInfo.sType =
+        VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+    vkShaderModuleCreateInfo.pNext = NULL;
+    vkShaderModuleCreateInfo.flags = 0;
+    vkShaderModuleCreateInfo.codeSize = paddedCode.size();
+    vkShaderModuleCreateInfo.pCode =
+        (const uint32_t *)(void *)paddedCode.c_str();
+
+    vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL,
+                         &m_vkShaderModule);
+}
+
+VulkanShaderModule::~VulkanShaderModule()
+{
+    vkDestroyShaderModule(m_device, m_vkShaderModule, NULL);
+}
+
+VulkanShaderModule::operator VkShaderModule() const { return m_vkShaderModule; }
+
+///////////////////////////////////
+// VulkanPipeline implementation //
+///////////////////////////////////
+
+VulkanPipeline::VulkanPipeline(const VulkanPipeline &pipeline)
+    : m_device(pipeline.m_device), m_vkPipeline(pipeline.m_vkPipeline)
+{}
+
+VulkanPipeline::VulkanPipeline(const VulkanDevice &device)
+    : m_device(device), m_vkPipeline(VK_NULL_HANDLE)
+{}
+
+VulkanPipeline::~VulkanPipeline()
+{
+    vkDestroyPipeline(m_device, m_vkPipeline, NULL);
+}
+
+VulkanPipeline::operator VkPipeline() const { return m_vkPipeline; }
+
+//////////////////////////////////////////
+// VulkanComputePipeline implementation //
+//////////////////////////////////////////
+
+VulkanComputePipeline::VulkanComputePipeline(
+    const VulkanComputePipeline &computePipeline)
+    : VulkanPipeline(computePipeline)
+{}
+
+VulkanComputePipeline::VulkanComputePipeline(
+    const VulkanDevice &device, const VulkanPipelineLayout &pipelineLayout,
+    const VulkanShaderModule &shaderModule, const std::string &entryFuncName)
+    : VulkanPipeline(device)
+{
+    VkPipelineShaderStageCreateInfo vkPipelineShaderStageCreateInfo = {};
+    vkPipelineShaderStageCreateInfo.sType =
+        VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
+    vkPipelineShaderStageCreateInfo.pNext = NULL;
+    vkPipelineShaderStageCreateInfo.flags = 0;
+    vkPipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT;
+    vkPipelineShaderStageCreateInfo.module = shaderModule;
+    vkPipelineShaderStageCreateInfo.pName = entryFuncName.c_str();
+    vkPipelineShaderStageCreateInfo.pSpecializationInfo = NULL;
+
+    VkComputePipelineCreateInfo vkComputePipelineCreateInfo = {};
+    vkComputePipelineCreateInfo.sType =
+        VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
+    vkComputePipelineCreateInfo.pNext = NULL;
+    vkComputePipelineCreateInfo.flags = 0;
+    vkComputePipelineCreateInfo.stage = vkPipelineShaderStageCreateInfo;
+    vkComputePipelineCreateInfo.layout = pipelineLayout;
+    vkComputePipelineCreateInfo.basePipelineHandle = VK_NULL_HANDLE;
+    vkComputePipelineCreateInfo.basePipelineIndex = 0;
+
+    vkCreateComputePipelines(device, VK_NULL_HANDLE, 1,
+                             &vkComputePipelineCreateInfo, NULL, &m_vkPipeline);
+}
+
+VulkanComputePipeline::~VulkanComputePipeline() {}
+
+VulkanPipelineBindPoint VulkanComputePipeline::getPipelineBindPoint() const
+{
+    return VULKAN_PIPELINE_BIND_POINT_COMPUTE;
+}
+
+/////////////////////////////////////////
+// VulkanDescriptorPool implementation //
+/////////////////////////////////////////
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+    const VulkanDescriptorPool &descriptorPool)
+    : m_device(descriptorPool.m_device),
+      m_vkDescriptorPool(descriptorPool.m_vkDescriptorPool)
+{}
+
+void VulkanDescriptorPool::VulkanDescriptorPoolCommon(
+    const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+{
+    if (descriptorSetLayoutBindingList.size())
+    {
+        std::map<VkDescriptorType, uint32_t>
+            vkDescriptorTypeToDescriptorCountMap;
+
+        for (size_t dslbIdx = 0;
+             dslbIdx < descriptorSetLayoutBindingList.size(); dslbIdx++)
+        {
+            VkDescriptorSetLayoutBinding vkDescriptorSetLayoutBinding =
+                descriptorSetLayoutBindingList[dslbIdx];
+            if (vkDescriptorTypeToDescriptorCountMap.find(
+                    vkDescriptorSetLayoutBinding.descriptorType)
+                == vkDescriptorTypeToDescriptorCountMap.end())
+            {
+                vkDescriptorTypeToDescriptorCountMap
+                    [vkDescriptorSetLayoutBinding.descriptorType] = 1;
+            }
+            else
+            {
+                vkDescriptorTypeToDescriptorCountMap
+                    [vkDescriptorSetLayoutBinding.descriptorType]++;
+            }
+        }
+
+        std::vector<VkDescriptorPoolSize> vkDescriptorPoolSizeList;
+        std::map<VkDescriptorType, uint32_t>::iterator dtdcIt;
+        for (dtdcIt = vkDescriptorTypeToDescriptorCountMap.begin();
+             dtdcIt != vkDescriptorTypeToDescriptorCountMap.end(); ++dtdcIt)
+        {
+            VkDescriptorPoolSize vkDescriptorPoolSize = {};
+            vkDescriptorPoolSize.type = dtdcIt->first;
+            vkDescriptorPoolSize.descriptorCount = dtdcIt->second;
+
+            vkDescriptorPoolSizeList.push_back(vkDescriptorPoolSize);
+        }
+
+        VkDescriptorPoolCreateInfo vkDescriptorPoolCreateInfo = {};
+        vkDescriptorPoolCreateInfo.sType =
+            VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+        vkDescriptorPoolCreateInfo.pNext = NULL;
+        vkDescriptorPoolCreateInfo.flags =
+            VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+        vkDescriptorPoolCreateInfo.maxSets = 1;
+        vkDescriptorPoolCreateInfo.poolSizeCount =
+            (uint32_t)vkDescriptorPoolSizeList.size();
+        vkDescriptorPoolCreateInfo.pPoolSizes = vkDescriptorPoolSizeList.data();
+
+        vkCreateDescriptorPool(m_device, &vkDescriptorPoolCreateInfo, NULL,
+                               &m_vkDescriptorPool);
+    }
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding)
+    : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding);
+
+    VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+    const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1)
+    : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+    VulkanDescriptorSetLayoutBindingList descriptorSetLayoutBindingList;
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding0);
+    descriptorSetLayoutBindingList.add(descriptorSetLayoutBinding1);
+
+    VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::VulkanDescriptorPool(
+    const VulkanDevice &device,
+    const VulkanDescriptorSetLayoutBindingList &descriptorSetLayoutBindingList)
+    : m_device(device), m_vkDescriptorPool(VK_NULL_HANDLE)
+{
+    VulkanDescriptorPoolCommon(descriptorSetLayoutBindingList);
+}
+
+VulkanDescriptorPool::~VulkanDescriptorPool()
+{
+    if (m_vkDescriptorPool != VK_NULL_HANDLE)
+    {
+        vkDestroyDescriptorPool(m_device, m_vkDescriptorPool, NULL);
+    }
+}
+
+VulkanDescriptorPool::operator VkDescriptorPool() const
+{
+    return m_vkDescriptorPool;
+}
+
+////////////////////////////////////////
+// VulkanDescriptorSet implementation //
+////////////////////////////////////////
+
+VulkanDescriptorSet::VulkanDescriptorSet(
+    const VulkanDescriptorSet &descriptorSet)
+    : m_device(descriptorSet.m_device),
+      m_descriptorPool(descriptorSet.m_descriptorPool),
+      m_vkDescriptorSet(descriptorSet.m_vkDescriptorSet)
+{}
+
+VulkanDescriptorSet::VulkanDescriptorSet(
+    const VulkanDevice &device, const VulkanDescriptorPool &descriptorPool,
+    const VulkanDescriptorSetLayout &descriptorSetLayout)
+    : m_device(device), m_descriptorPool(descriptorPool),
+      m_vkDescriptorSet(VK_NULL_HANDLE)
+{
+    VkDescriptorSetLayout vkDescriptorSetLayout = descriptorSetLayout;
+
+    if ((VkDescriptorPool)m_descriptorPool)
+    {
+        VkDescriptorSetAllocateInfo vkDescriptorSetAllocateInfo = {};
+        vkDescriptorSetAllocateInfo.sType =
+            VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
+        vkDescriptorSetAllocateInfo.pNext = NULL;
+        vkDescriptorSetAllocateInfo.descriptorPool = descriptorPool;
+        vkDescriptorSetAllocateInfo.descriptorSetCount = 1;
+        vkDescriptorSetAllocateInfo.pSetLayouts = &vkDescriptorSetLayout;
+
+        vkAllocateDescriptorSets(m_device, &vkDescriptorSetAllocateInfo,
+                                 &m_vkDescriptorSet);
+    }
+}
+
+VulkanDescriptorSet::~VulkanDescriptorSet()
+{
+    if ((VkDescriptorPool)m_descriptorPool)
+    {
+        vkFreeDescriptorSets(m_device, m_descriptorPool, 1, &m_vkDescriptorSet);
+    }
+}
+
+void VulkanDescriptorSet::update(uint32_t binding, const VulkanBuffer &buffer)
+{
+    VkDescriptorBufferInfo vkDescriptorBufferInfo = {};
+    vkDescriptorBufferInfo.buffer = buffer;
+    vkDescriptorBufferInfo.offset = 0;
+    vkDescriptorBufferInfo.range = VK_WHOLE_SIZE;
+
+    VkWriteDescriptorSet vkWriteDescriptorSet = {};
+    vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    vkWriteDescriptorSet.pNext = NULL;
+    vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+    vkWriteDescriptorSet.dstBinding = binding;
+    vkWriteDescriptorSet.dstArrayElement = 0;
+    vkWriteDescriptorSet.descriptorCount = 1;
+    vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+    vkWriteDescriptorSet.pImageInfo = NULL;
+    vkWriteDescriptorSet.pBufferInfo = &vkDescriptorBufferInfo;
+    vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+    vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+}
+
+void VulkanDescriptorSet::update(uint32_t binding,
+                                 const VulkanImageView &imageView)
+{
+    VkDescriptorImageInfo vkDescriptorImageInfo = {};
+    vkDescriptorImageInfo.sampler = VK_NULL_HANDLE;
+    vkDescriptorImageInfo.imageView = imageView;
+    vkDescriptorImageInfo.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
+
+    VkWriteDescriptorSet vkWriteDescriptorSet = {};
+    vkWriteDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+    vkWriteDescriptorSet.pNext = NULL;
+    vkWriteDescriptorSet.dstSet = m_vkDescriptorSet;
+    vkWriteDescriptorSet.dstBinding = binding;
+    vkWriteDescriptorSet.dstArrayElement = 0;
+    vkWriteDescriptorSet.descriptorCount = 1;
+    vkWriteDescriptorSet.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
+    vkWriteDescriptorSet.pImageInfo = &vkDescriptorImageInfo;
+    vkWriteDescriptorSet.pBufferInfo = NULL;
+    vkWriteDescriptorSet.pTexelBufferView = NULL;
+
+    vkUpdateDescriptorSets(m_device, 1, &vkWriteDescriptorSet, 0, NULL);
+}
+
+VulkanDescriptorSet::operator VkDescriptorSet() const
+{
+    return m_vkDescriptorSet;
+}
+
+///////////////////////////////////
+// VulkanOffset3D implementation //
+///////////////////////////////////
+
+VulkanOffset3D::VulkanOffset3D(const VulkanOffset3D &offset3D)
+    : m_vkOffset3D(offset3D.m_vkOffset3D)
+{}
+
+VulkanOffset3D::VulkanOffset3D(uint32_t x, uint32_t y, uint32_t z)
+{
+    m_vkOffset3D.x = x;
+    m_vkOffset3D.y = y;
+    m_vkOffset3D.z = z;
+}
+
+VulkanOffset3D::~VulkanOffset3D() {}
+
+uint32_t VulkanOffset3D::getX() const { return m_vkOffset3D.x; }
+
+uint32_t VulkanOffset3D::getY() const { return m_vkOffset3D.y; }
+
+uint32_t VulkanOffset3D::getZ() const { return m_vkOffset3D.z; }
+
+VulkanOffset3D::operator VkOffset3D() const { return m_vkOffset3D; }
+
+///////////////////////////////////
+// VulkanExtent3D implementation //
+///////////////////////////////////
+
+VulkanExtent3D::VulkanExtent3D(const VulkanExtent3D &extent3D)
+    : m_vkExtent3D(extent3D.m_vkExtent3D)
+{}
+
+VulkanExtent3D::VulkanExtent3D(uint32_t width, uint32_t height, uint32_t depth)
+{
+    m_vkExtent3D.width = width;
+    m_vkExtent3D.height = height;
+    m_vkExtent3D.depth = depth;
+}
+
+VulkanExtent3D::~VulkanExtent3D() {}
+
+uint32_t VulkanExtent3D::getWidth() const { return m_vkExtent3D.width; }
+
+uint32_t VulkanExtent3D::getHeight() const { return m_vkExtent3D.height; }
+
+uint32_t VulkanExtent3D::getDepth() const { return m_vkExtent3D.depth; }
+
+VulkanExtent3D::operator VkExtent3D() const { return m_vkExtent3D; }
+
+//////////////////////////////////////
+// VulkanCommandPool implementation //
+//////////////////////////////////////
+
+VulkanCommandPool::VulkanCommandPool(const VulkanCommandPool &commandPool)
+    : m_device(commandPool.m_device),
+      m_vkCommandPool(commandPool.m_vkCommandPool)
+{}
+
+VulkanCommandPool::VulkanCommandPool(const VulkanDevice &device,
+                                     const VulkanQueueFamily &queueFamily)
+    : m_device(device)
+{
+    VkCommandPoolCreateInfo vkCommandPoolCreateInfo = {};
+    vkCommandPoolCreateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
+    vkCommandPoolCreateInfo.pNext = NULL;
+    vkCommandPoolCreateInfo.flags =
+        VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
+    vkCommandPoolCreateInfo.queueFamilyIndex = queueFamily;
+
+    vkCreateCommandPool(m_device, &vkCommandPoolCreateInfo, NULL,
+                        &m_vkCommandPool);
+}
+
+VulkanCommandPool::~VulkanCommandPool()
+{
+    vkDestroyCommandPool(m_device, m_vkCommandPool, NULL);
+}
+
+VulkanCommandPool::operator VkCommandPool() const { return m_vkCommandPool; }
+
+////////////////////////////////////////
+// VulkanCommandBuffer implementation //
+////////////////////////////////////////
+
+VulkanCommandBuffer::VulkanCommandBuffer(
+    const VulkanCommandBuffer &commandBuffer)
+    : m_device(commandBuffer.m_device),
+      m_commandPool(commandBuffer.m_commandPool),
+      m_vkCommandBuffer(commandBuffer.m_vkCommandBuffer)
+{}
+
+VulkanCommandBuffer::VulkanCommandBuffer(const VulkanDevice &device,
+                                         const VulkanCommandPool &commandPool)
+    : m_device(device), m_commandPool(commandPool)
+{
+    VkCommandBufferAllocateInfo vkCommandBufferAllocateInfo = {};
+    vkCommandBufferAllocateInfo.sType =
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
+    vkCommandBufferAllocateInfo.pNext = NULL;
+    vkCommandBufferAllocateInfo.commandPool = commandPool;
+    vkCommandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
+    vkCommandBufferAllocateInfo.commandBufferCount = 1;
+
+    vkAllocateCommandBuffers(m_device, &vkCommandBufferAllocateInfo,
+                             &m_vkCommandBuffer);
+}
+
+VulkanCommandBuffer::~VulkanCommandBuffer()
+{
+    vkFreeCommandBuffers(m_device, m_commandPool, 1, &m_vkCommandBuffer);
+}
+
+void VulkanCommandBuffer::begin()
+{
+    VkCommandBufferBeginInfo vkCommandBufferBeginInfo = {};
+    vkCommandBufferBeginInfo.sType =
+        VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
+    vkCommandBufferBeginInfo.pNext = NULL;
+    vkCommandBufferBeginInfo.flags =
+        VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
+    vkCommandBufferBeginInfo.pInheritanceInfo = NULL;
+
+    vkBeginCommandBuffer(m_vkCommandBuffer, &vkCommandBufferBeginInfo);
+}
+
+void VulkanCommandBuffer::bindPipeline(const VulkanPipeline &pipeline)
+{
+    VkPipelineBindPoint vkPipelineBindPoint =
+        (VkPipelineBindPoint)pipeline.getPipelineBindPoint();
+
+    vkCmdBindPipeline(m_vkCommandBuffer, vkPipelineBindPoint, pipeline);
+}
+
+void VulkanCommandBuffer::bindDescriptorSets(
+    const VulkanPipeline &pipeline, const VulkanPipelineLayout &pipelineLayout,
+    const VulkanDescriptorSet &descriptorSet)
+{
+    VkPipelineBindPoint vkPipelineBindPoint =
+        (VkPipelineBindPoint)pipeline.getPipelineBindPoint();
+    VkDescriptorSet vkDescriptorSet = descriptorSet;
+
+    vkCmdBindDescriptorSets(m_vkCommandBuffer, vkPipelineBindPoint,
+                            pipelineLayout, 0, 1, &vkDescriptorSet, 0, NULL);
+}
+
+void VulkanCommandBuffer::pipelineBarrier(const VulkanImage2DList &image2DList,
+                                          VulkanImageLayout oldImageLayout,
+                                          VulkanImageLayout newImageLayout)
+{
+    std::vector<VkImageMemoryBarrier> vkImageMemoryBarrierList;
+    for (size_t i2DIdx = 0; i2DIdx < image2DList.size(); i2DIdx++)
+    {
+        VkImageSubresourceRange vkImageSubresourceRange = {};
+        vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        vkImageSubresourceRange.baseMipLevel = 0;
+        vkImageSubresourceRange.levelCount = VK_REMAINING_MIP_LEVELS;
+        vkImageSubresourceRange.baseArrayLayer = 0;
+        vkImageSubresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS;
+
+        VkImageMemoryBarrier vkImageMemoryBarrier = {};
+        vkImageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
+        vkImageMemoryBarrier.pNext = NULL;
+        vkImageMemoryBarrier.srcAccessMask = 0;
+        vkImageMemoryBarrier.dstAccessMask = 0;
+        vkImageMemoryBarrier.oldLayout = (VkImageLayout)oldImageLayout;
+        vkImageMemoryBarrier.newLayout = (VkImageLayout)newImageLayout;
+        vkImageMemoryBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+        vkImageMemoryBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
+        vkImageMemoryBarrier.image = image2DList[i2DIdx];
+        vkImageMemoryBarrier.subresourceRange = vkImageSubresourceRange;
+
+        vkImageMemoryBarrierList.push_back(vkImageMemoryBarrier);
+    }
+
+    vkCmdPipelineBarrier(m_vkCommandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+                         VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, NULL, 0,
+                         NULL, (uint32_t)vkImageMemoryBarrierList.size(),
+                         vkImageMemoryBarrierList.data());
+}
+
+void VulkanCommandBuffer::dispatch(uint32_t groupCountX, uint32_t groupCountY,
+                                   uint32_t groupCountZ)
+{
+    vkCmdDispatch(m_vkCommandBuffer, groupCountX, groupCountY, groupCountZ);
+}
+
+void VulkanCommandBuffer::fillBuffer(const VulkanBuffer &buffer, uint32_t data,
+                                     uint64_t offset, uint64_t size)
+{
+    vkCmdFillBuffer(m_vkCommandBuffer, buffer, offset, size, data);
+}
+
+void VulkanCommandBuffer::updateBuffer(const VulkanBuffer &buffer, void *pdata,
+                                       uint64_t offset, uint64_t size)
+{
+    vkCmdUpdateBuffer(m_vkCommandBuffer, buffer, offset, size, pdata);
+}
+
+void VulkanCommandBuffer::copyBufferToImage(const VulkanBuffer &buffer,
+                                            const VulkanImage &image,
+                                            VulkanImageLayout imageLayout)
+{
+    VkDeviceSize bufferOffset = 0;
+
+    std::vector<VkBufferImageCopy> vkBufferImageCopyList;
+    for (uint32_t mipLevel = 0; mipLevel < image.getNumMipLevels(); mipLevel++)
+    {
+        VulkanExtent3D extent3D = image.getExtent3D(mipLevel);
+        size_t elementSize = getVulkanFormatElementSize(image.getFormat());
+
+        VkImageSubresourceLayers vkImageSubresourceLayers = {};
+        vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+        vkImageSubresourceLayers.mipLevel = mipLevel;
+        vkImageSubresourceLayers.baseArrayLayer = 0;
+        vkImageSubresourceLayers.layerCount = image.getNumLayers();
+
+        VkBufferImageCopy vkBufferImageCopy = {};
+        vkBufferImageCopy.bufferOffset = bufferOffset;
+        vkBufferImageCopy.bufferRowLength = 0;
+        vkBufferImageCopy.bufferImageHeight = 0;
+        vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+        vkBufferImageCopy.imageOffset = VulkanOffset3D(0, 0, 0);
+        vkBufferImageCopy.imageExtent = extent3D;
+
+        vkBufferImageCopyList.push_back(vkBufferImageCopy);
+
+        bufferOffset += extent3D.getWidth() * extent3D.getHeight()
+            * extent3D.getDepth() * elementSize;
+        bufferOffset =
+            ROUND_UP(bufferOffset,
+                     std::max(elementSize,
+                              (size_t)VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT));
+    }
+
+    vkCmdCopyBufferToImage(
+        m_vkCommandBuffer, buffer, image, (VkImageLayout)imageLayout,
+        (uint32_t)vkBufferImageCopyList.size(), vkBufferImageCopyList.data());
+}
+
+void VulkanCommandBuffer::copyBufferToImage(
+    const VulkanBuffer &buffer, const VulkanImage &image, uint64_t bufferOffset,
+    uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount,
+    VulkanOffset3D offset3D, VulkanExtent3D extent3D)
+{
+    VkImageSubresourceLayers vkImageSubresourceLayers = {};
+    vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    vkImageSubresourceLayers.mipLevel = mipLevel;
+    vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer;
+    vkImageSubresourceLayers.layerCount = layerCount;
+
+    VkExtent3D vkExtent3D = extent3D;
+    if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0)
+        && (extent3D.getDepth() == 0))
+    {
+        vkExtent3D = image.getExtent3D(mipLevel);
+    }
+
+    VkBufferImageCopy vkBufferImageCopy = {};
+    vkBufferImageCopy.bufferOffset = bufferOffset;
+    vkBufferImageCopy.bufferRowLength = 0;
+    vkBufferImageCopy.bufferImageHeight = 0;
+    vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+    vkBufferImageCopy.imageOffset = offset3D;
+    vkBufferImageCopy.imageExtent = vkExtent3D;
+
+    vkCmdCopyBufferToImage(m_vkCommandBuffer, buffer, image,
+                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                           &vkBufferImageCopy);
+}
+
+void VulkanCommandBuffer::copyImageToBuffer(
+    const VulkanImage &image, const VulkanBuffer &buffer, uint64_t bufferOffset,
+    uint32_t mipLevel, uint32_t baseArrayLayer, uint32_t layerCount,
+    VulkanOffset3D offset3D, VulkanExtent3D extent3D)
+{
+    VkImageSubresourceLayers vkImageSubresourceLayers = {};
+    vkImageSubresourceLayers.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    vkImageSubresourceLayers.mipLevel = mipLevel;
+    vkImageSubresourceLayers.baseArrayLayer = baseArrayLayer;
+    vkImageSubresourceLayers.layerCount = layerCount;
+
+    VkExtent3D vkExtent3D = extent3D;
+    if ((extent3D.getWidth() == 0) && (extent3D.getHeight() == 0)
+        && (extent3D.getDepth() == 0))
+    {
+        vkExtent3D = image.getExtent3D(mipLevel);
+    }
+
+    VkBufferImageCopy vkBufferImageCopy = {};
+    vkBufferImageCopy.bufferOffset = bufferOffset;
+    vkBufferImageCopy.bufferRowLength = 0;
+    vkBufferImageCopy.bufferImageHeight = 0;
+    vkBufferImageCopy.imageSubresource = vkImageSubresourceLayers;
+    vkBufferImageCopy.imageOffset = offset3D;
+    vkBufferImageCopy.imageExtent = vkExtent3D;
+
+    vkCmdCopyImageToBuffer(m_vkCommandBuffer, image, VK_IMAGE_LAYOUT_GENERAL,
+                           buffer, 1, &vkBufferImageCopy);
+}
+
+void VulkanCommandBuffer::end() { vkEndCommandBuffer(m_vkCommandBuffer); }
+
+VulkanCommandBuffer::operator VkCommandBuffer() const
+{
+    return m_vkCommandBuffer;
+}
+
+/////////////////////////////////
+// VulkanBuffer implementation //
+/////////////////////////////////
+
+VulkanBuffer::VulkanBuffer(const VulkanBuffer &buffer)
+    : m_device(buffer.m_device), m_vkBuffer(buffer.m_vkBuffer),
+      m_size(buffer.m_size), m_alignment(buffer.m_alignment),
+      m_memoryTypeList(buffer.m_memoryTypeList)
+{}
+
+VulkanBuffer::VulkanBuffer(
+    const VulkanDevice &device, uint64_t size,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanBufferUsage bufferUsage, VulkanSharingMode sharingMode,
+    const VulkanQueueFamilyList &queueFamilyList)
+    : m_device(device), m_vkBuffer(VK_NULL_HANDLE)
+{
+    std::vector<uint32_t> queueFamilyIndexList;
+    if (queueFamilyList.size() == 0)
+    {
+        for (size_t qfIdx = 0;
+             qfIdx < device.getPhysicalDevice().getQueueFamilyList().size();
+             qfIdx++)
+        {
+            queueFamilyIndexList.push_back(
+                device.getPhysicalDevice().getQueueFamilyList()[qfIdx]);
+        }
+    }
+    else
+    {
+        for (size_t qfIdx = 0; qfIdx < queueFamilyList.size(); qfIdx++)
+        {
+            queueFamilyIndexList.push_back(queueFamilyList[qfIdx]);
+        }
+    }
+
+    VkBufferCreateInfo vkBufferCreateInfo = {};
+    vkBufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
+    vkBufferCreateInfo.pNext = NULL;
+    vkBufferCreateInfo.flags = 0;
+    vkBufferCreateInfo.size = (VkDeviceSize)size;
+    vkBufferCreateInfo.usage = (VkBufferUsageFlags)bufferUsage;
+    vkBufferCreateInfo.sharingMode = (VkSharingMode)sharingMode;
+    vkBufferCreateInfo.queueFamilyIndexCount =
+        (uint32_t)queueFamilyIndexList.size();
+    vkBufferCreateInfo.pQueueFamilyIndices = queueFamilyIndexList.data();
+
+    VkExternalMemoryBufferCreateInfo vkExternalMemoryBufferCreateInfo = {};
+    if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE)
+    {
+        vkExternalMemoryBufferCreateInfo.sType =
+            VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHR;
+        vkExternalMemoryBufferCreateInfo.pNext = NULL;
+        vkExternalMemoryBufferCreateInfo.handleTypes =
+            (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType;
+
+        vkBufferCreateInfo.pNext = &vkExternalMemoryBufferCreateInfo;
+    }
+
+    vkCreateBuffer(m_device, &vkBufferCreateInfo, NULL, &m_vkBuffer);
+
+    VkMemoryRequirements vkMemoryRequirements = {};
+    vkGetBufferMemoryRequirements(m_device, m_vkBuffer, &vkMemoryRequirements);
+    m_size = vkMemoryRequirements.size;
+    m_alignment = vkMemoryRequirements.alignment;
+    const VulkanMemoryTypeList &memoryTypeList =
+        m_device.getPhysicalDevice().getMemoryTypeList();
+    for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+    {
+        uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
+        if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+        {
+            m_memoryTypeList.add(memoryTypeList[mtIdx]);
+        }
+    }
+}
+
+VulkanBuffer::~VulkanBuffer() { vkDestroyBuffer(m_device, m_vkBuffer, NULL); }
+
+uint64_t VulkanBuffer::getSize() const { return m_size; }
+
+uint64_t VulkanBuffer::getAlignment() const { return m_alignment; }
+
+const VulkanMemoryTypeList &VulkanBuffer::getMemoryTypeList() const
+{
+    return m_memoryTypeList;
+}
+
+VulkanBuffer::operator VkBuffer() const { return m_vkBuffer; }
+
+////////////////////////////////
+// VulkanImage implementation //
+////////////////////////////////
+
+VulkanImage::VulkanImage(const VulkanImage &image)
+    : m_device(image.m_device), m_imageType(image.m_imageType),
+      m_extent3D(image.m_extent3D), m_format(image.m_format),
+      m_numMipLevels(image.m_numMipLevels), m_numLayers(image.m_numLayers),
+      m_vkImage(image.m_vkImage), m_size(image.m_size),
+      m_alignment(image.m_alignment), m_memoryTypeList(image.m_memoryTypeList)
+{}
+
+VulkanImage::VulkanImage(
+    const VulkanDevice &device, VulkanImageType imageType, VulkanFormat format,
+    const VulkanExtent3D &extent3D, uint32_t numMipLevels, uint32_t arrayLayers,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageTiling imageTiling,
+    VulkanImageUsage imageUsage, VulkanSharingMode sharingMode)
+    : m_device(device), m_imageType(imageType), m_extent3D(extent3D),
+      m_format(format), m_numMipLevels(numMipLevels), m_numLayers(arrayLayers),
+      m_vkImage(VK_NULL_HANDLE)
+{
+    VkImageCreateInfo vkImageCreateInfo = {};
+    vkImageCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
+    vkImageCreateInfo.pNext = NULL;
+    vkImageCreateInfo.flags = (VkImageCreateFlags)imageCreateFlag;
+    vkImageCreateInfo.imageType = (VkImageType)imageType;
+    vkImageCreateInfo.format = (VkFormat)format;
+    vkImageCreateInfo.extent = extent3D;
+    vkImageCreateInfo.mipLevels = numMipLevels;
+    vkImageCreateInfo.arrayLayers = arrayLayers;
+    vkImageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
+    vkImageCreateInfo.tiling = (VkImageTiling)imageTiling;
+    vkImageCreateInfo.usage = (VkImageUsageFlags)imageUsage;
+    vkImageCreateInfo.sharingMode = (VkSharingMode)sharingMode;
+    vkImageCreateInfo.queueFamilyIndexCount =
+        (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size();
+    vkImageCreateInfo.pQueueFamilyIndices =
+        m_device.getPhysicalDevice().getQueueFamilyList();
+    vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+    VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {};
+    if (externalMemoryHandleType != VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE)
+    {
+        vkExternalMemoryImageCreateInfo.sType =
+            VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
+        vkExternalMemoryImageCreateInfo.pNext = NULL;
+        vkExternalMemoryImageCreateInfo.handleTypes =
+            (VkExternalMemoryHandleTypeFlags)externalMemoryHandleType;
+
+        vkImageCreateInfo.pNext = &vkExternalMemoryImageCreateInfo;
+    }
+
+    vkCreateImage(m_device, &vkImageCreateInfo, NULL, &m_vkImage);
+    VulkanImageCreateInfo = vkImageCreateInfo;
+    VkMemoryRequirements vkMemoryRequirements = {};
+    vkGetImageMemoryRequirements(m_device, m_vkImage, &vkMemoryRequirements);
+    m_size = vkMemoryRequirements.size;
+    m_alignment = vkMemoryRequirements.alignment;
+    const VulkanMemoryTypeList &memoryTypeList =
+        m_device.getPhysicalDevice().getMemoryTypeList();
+    for (size_t mtIdx = 0; mtIdx < memoryTypeList.size(); mtIdx++)
+    {
+        uint32_t memoryTypeIndex = memoryTypeList[mtIdx];
+        if ((1 << memoryTypeIndex) & vkMemoryRequirements.memoryTypeBits)
+        {
+            m_memoryTypeList.add(memoryTypeList[mtIdx]);
+        }
+    }
+}
+
+VulkanImage::~VulkanImage() { vkDestroyImage(m_device, m_vkImage, NULL); }
+
+VulkanExtent3D VulkanImage::getExtent3D(uint32_t mipLevel) const
+{
+    return VulkanExtent3D(0, 0, 0);
+}
+
+VulkanFormat VulkanImage::getFormat() const { return m_format; }
+
+VkImageCreateInfo VulkanImage::getVkImageCreateInfo() const
+{
+    return VulkanImageCreateInfo;
+}
+
+uint32_t VulkanImage::getNumMipLevels() const { return m_numMipLevels; }
+
+uint32_t VulkanImage::getNumLayers() const { return m_numLayers; }
+
+uint64_t VulkanImage::getSize() const { return m_size; }
+
+uint64_t VulkanImage::getAlignment() const { return m_alignment; }
+
+const VulkanMemoryTypeList &VulkanImage::getMemoryTypeList() const
+{
+    return m_memoryTypeList;
+}
+
+VulkanImage::operator VkImage() const { return m_vkImage; }
+
+//////////////////////////////////
+// VulkanImage2D implementation //
+//////////////////////////////////
+
+VulkanImage2D::VulkanImage2D(const VulkanImage2D &image2D): VulkanImage(image2D)
+{}
+
+VulkanImage2D::VulkanImage2D(
+    const VulkanDevice &device, VulkanFormat format, uint32_t width,
+    uint32_t height, uint32_t numMipLevels,
+    VulkanExternalMemoryHandleType externalMemoryHandleType,
+    VulkanImageCreateFlag imageCreateFlag, VulkanImageUsage imageUsage,
+    VulkanSharingMode sharingMode)
+    : VulkanImage(device, VULKAN_IMAGE_TYPE_2D, format,
+                  VulkanExtent3D(width, height, 1), numMipLevels, 1,
+                  externalMemoryHandleType, imageCreateFlag,
+                  VULKAN_IMAGE_TILING_OPTIMAL, imageUsage, sharingMode)
+{}
+
+VulkanImage2D::~VulkanImage2D() {}
+
+VulkanExtent3D VulkanImage2D::getExtent3D(uint32_t mipLevel) const
+{
+    uint32_t width = std::max(m_extent3D.getWidth() >> mipLevel, uint32_t(1));
+    uint32_t height = std::max(m_extent3D.getHeight() >> mipLevel, uint32_t(1));
+    uint32_t depth = 1;
+
+    return VulkanExtent3D(width, height, depth);
+}
+
+////////////////////////////////////
+// VulkanImageView implementation //
+////////////////////////////////////
+
+VulkanImageView::VulkanImageView(const VulkanImageView &imageView)
+    : m_device(imageView.m_device), m_vkImageView(imageView.m_vkImageView)
+{}
+
+VulkanImageView::VulkanImageView(const VulkanDevice &device,
+                                 const VulkanImage &image,
+                                 VulkanImageViewType imageViewType,
+                                 uint32_t baseMipLevel, uint32_t levelCount,
+                                 uint32_t baseArrayLayer, uint32_t layerCount)
+    : m_device(device), m_vkImageView(VK_NULL_HANDLE)
+{
+    VkComponentMapping vkComponentMapping = {};
+    vkComponentMapping.r = VK_COMPONENT_SWIZZLE_IDENTITY;
+    vkComponentMapping.g = VK_COMPONENT_SWIZZLE_IDENTITY;
+    vkComponentMapping.b = VK_COMPONENT_SWIZZLE_IDENTITY;
+    vkComponentMapping.a = VK_COMPONENT_SWIZZLE_IDENTITY;
+
+    VkImageSubresourceRange vkImageSubresourceRange = {};
+    vkImageSubresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    vkImageSubresourceRange.baseMipLevel = baseMipLevel;
+    vkImageSubresourceRange.levelCount = levelCount;
+    vkImageSubresourceRange.baseArrayLayer = baseArrayLayer;
+    vkImageSubresourceRange.layerCount = layerCount;
+
+    VkImageViewCreateInfo vkImageViewCreateInfo = {};
+    vkImageViewCreateInfo.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
+    vkImageViewCreateInfo.pNext = NULL;
+    vkImageViewCreateInfo.flags = 0;
+    vkImageViewCreateInfo.image = image;
+    vkImageViewCreateInfo.viewType = (VkImageViewType)imageViewType;
+    vkImageViewCreateInfo.format = (VkFormat)image.getFormat();
+    vkImageViewCreateInfo.components = vkComponentMapping;
+    vkImageViewCreateInfo.subresourceRange = vkImageSubresourceRange;
+
+    vkCreateImageView(m_device, &vkImageViewCreateInfo, NULL, &m_vkImageView);
+}
+
+VulkanImageView::~VulkanImageView()
+{
+    vkDestroyImageView(m_device, m_vkImageView, NULL);
+}
+
+VulkanImageView::operator VkImageView() const { return m_vkImageView; }
+
+///////////////////////////////////////
+// VulkanDeviceMemory implementation //
+///////////////////////////////////////
+
+#if defined(_WIN32) || defined(_WIN64)
+
+class WindowsSecurityAttributes {
+protected:
+    SECURITY_ATTRIBUTES m_winSecurityAttributes;
+    PSECURITY_DESCRIPTOR m_winPSecurityDescriptor;
+
+public:
+    WindowsSecurityAttributes();
+    SECURITY_ATTRIBUTES *operator&();
+    ~WindowsSecurityAttributes();
+};
+
+
+WindowsSecurityAttributes::WindowsSecurityAttributes()
+{
+    m_winPSecurityDescriptor = (PSECURITY_DESCRIPTOR)calloc(
+        1, SECURITY_DESCRIPTOR_MIN_LENGTH + 2 * sizeof(void **));
+    // CHECK_NEQ(m_winPSecurityDescriptor, (PSECURITY_DESCRIPTOR)NULL);
+    PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor
+                           + SECURITY_DESCRIPTOR_MIN_LENGTH);
+    PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
+    InitializeSecurityDescriptor(m_winPSecurityDescriptor,
+                                 SECURITY_DESCRIPTOR_REVISION);
+    SID_IDENTIFIER_AUTHORITY sidIdentifierAuthority =
+        SECURITY_WORLD_SID_AUTHORITY;
+    AllocateAndInitializeSid(&sidIdentifierAuthority, 1, SECURITY_WORLD_RID, 0,
+                             0, 0, 0, 0, 0, 0, ppSID);
+    EXPLICIT_ACCESS explicitAccess;
+    ZeroMemory(&explicitAccess, sizeof(EXPLICIT_ACCESS));
+    explicitAccess.grfAccessPermissions =
+        STANDARD_RIGHTS_ALL | SPECIFIC_RIGHTS_ALL;
+    explicitAccess.grfAccessMode = SET_ACCESS;
+    explicitAccess.grfInheritance = INHERIT_ONLY;
+    explicitAccess.Trustee.TrusteeForm = TRUSTEE_IS_SID;
+    explicitAccess.Trustee.TrusteeType = TRUSTEE_IS_WELL_KNOWN_GROUP;
+    explicitAccess.Trustee.ptstrName = (LPTSTR)*ppSID;
+    SetEntriesInAcl(1, &explicitAccess, NULL, ppACL);
+    SetSecurityDescriptorDacl(m_winPSecurityDescriptor, TRUE, *ppACL, FALSE);
+    m_winSecurityAttributes.nLength = sizeof(m_winSecurityAttributes);
+    m_winSecurityAttributes.lpSecurityDescriptor = m_winPSecurityDescriptor;
+    m_winSecurityAttributes.bInheritHandle = TRUE;
+}
+
+SECURITY_ATTRIBUTES *WindowsSecurityAttributes::operator&()
+{
+    return &m_winSecurityAttributes;
+}
+
+WindowsSecurityAttributes::~WindowsSecurityAttributes()
+{
+    PSID *ppSID = (PSID *)((PBYTE)m_winPSecurityDescriptor
+                           + SECURITY_DESCRIPTOR_MIN_LENGTH);
+    PACL *ppACL = (PACL *)((PBYTE)ppSID + sizeof(PSID *));
+    if (*ppSID)
+    {
+        FreeSid(*ppSID);
+    }
+    if (*ppACL)
+    {
+        LocalFree(*ppACL);
+    }
+    free(m_winPSecurityDescriptor);
+}
+
+#endif
+
+VulkanDeviceMemory::VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory)
+    : m_device(deviceMemory.m_device),
+      m_vkDeviceMemory(deviceMemory.m_vkDeviceMemory),
+      m_size(deviceMemory.m_size), m_isDedicated(deviceMemory.m_isDedicated)
+{}
+
+VulkanDeviceMemory::VulkanDeviceMemory(
+    const VulkanDevice &device, uint64_t size,
+    const VulkanMemoryType &memoryType,
+    VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
+    : m_device(device), m_size(size), m_isDedicated(false)
+{
+#if defined(_WIN32) || defined(_WIN64)
+    WindowsSecurityAttributes winSecurityAttributes;
+
+    VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {};
+    vkExportMemoryWin32HandleInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
+    vkExportMemoryWin32HandleInfoKHR.pNext = NULL;
+    vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+    vkExportMemoryWin32HandleInfoKHR.dwAccess =
+        DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+    vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name;
+
+#endif
+
+    VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {};
+    vkExportMemoryAllocateInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+    vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType
+            & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+        ? &vkExportMemoryWin32HandleInfoKHR
+        : NULL;
+#else
+    vkExportMemoryAllocateInfoKHR.pNext = NULL;
+#endif
+    vkExportMemoryAllocateInfoKHR.handleTypes =
+        (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType;
+
+    VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
+    vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+    vkMemoryAllocateInfo.pNext =
+        externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+    vkMemoryAllocateInfo.allocationSize = m_size;
+    vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+
+    vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory);
+}
+
+VulkanDeviceMemory::VulkanDeviceMemory(
+    const VulkanDevice &device, const VulkanImage &image,
+    const VulkanMemoryType &memoryType,
+    VulkanExternalMemoryHandleType externalMemoryHandleType, const void *name)
+    : m_device(device), m_size(image.getSize()), m_isDedicated(true)
+{
+#if defined(_WIN32) || defined(_WIN64)
+    WindowsSecurityAttributes winSecurityAttributes;
+
+    VkExportMemoryWin32HandleInfoKHR vkExportMemoryWin32HandleInfoKHR = {};
+    vkExportMemoryWin32HandleInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
+    vkExportMemoryWin32HandleInfoKHR.pNext = NULL;
+    vkExportMemoryWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+    vkExportMemoryWin32HandleInfoKHR.dwAccess =
+        DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+    vkExportMemoryWin32HandleInfoKHR.name = (LPCWSTR)name;
+
+#endif
+
+    VkExportMemoryAllocateInfoKHR vkExportMemoryAllocateInfoKHR = {};
+    vkExportMemoryAllocateInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+    vkExportMemoryAllocateInfoKHR.pNext = externalMemoryHandleType
+            & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT
+        ? &vkExportMemoryWin32HandleInfoKHR
+        : NULL;
+#else
+    vkExportMemoryAllocateInfoKHR.pNext = NULL;
+#endif
+    vkExportMemoryAllocateInfoKHR.handleTypes =
+        (VkExternalMemoryHandleTypeFlagsKHR)externalMemoryHandleType;
+
+    VkMemoryDedicatedAllocateInfo vkMemoryDedicatedAllocateInfo = {};
+    vkMemoryDedicatedAllocateInfo.sType =
+        VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
+    vkMemoryDedicatedAllocateInfo.pNext =
+        externalMemoryHandleType ? &vkExportMemoryAllocateInfoKHR : NULL;
+    vkMemoryDedicatedAllocateInfo.image = image;
+    vkMemoryDedicatedAllocateInfo.buffer = VK_NULL_HANDLE;
+
+    VkMemoryAllocateInfo vkMemoryAllocateInfo = {};
+    vkMemoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
+    vkMemoryAllocateInfo.pNext = &vkMemoryDedicatedAllocateInfo;
+    vkMemoryAllocateInfo.allocationSize = m_size;
+    vkMemoryAllocateInfo.memoryTypeIndex = (uint32_t)memoryType;
+
+    vkAllocateMemory(m_device, &vkMemoryAllocateInfo, NULL, &m_vkDeviceMemory);
+}
+
+VulkanDeviceMemory::~VulkanDeviceMemory()
+{
+    vkFreeMemory(m_device, m_vkDeviceMemory, NULL);
+}
+
+uint64_t VulkanDeviceMemory::getSize() const { return m_size; }
+
+#ifdef _WIN32
+HANDLE VulkanDeviceMemory::getHandle(
+    VulkanExternalMemoryHandleType externalMemoryHandleType) const
+{
+    HANDLE handle;
+
+    VkMemoryGetWin32HandleInfoKHR vkMemoryGetWin32HandleInfoKHR = {};
+    vkMemoryGetWin32HandleInfoKHR.sType =
+        VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR;
+    vkMemoryGetWin32HandleInfoKHR.pNext = NULL;
+    vkMemoryGetWin32HandleInfoKHR.memory = m_vkDeviceMemory;
+    vkMemoryGetWin32HandleInfoKHR.handleType =
+        (VkExternalMemoryHandleTypeFlagBitsKHR)externalMemoryHandleType;
+
+    vkGetMemoryWin32HandleKHR(m_device, &vkMemoryGetWin32HandleInfoKHR,
+                              &handle);
+
+    return handle;
+}
+#else
+int VulkanDeviceMemory::getHandle(
+    VulkanExternalMemoryHandleType externalMemoryHandleType) const
+{
+    if (externalMemoryHandleType
+        == VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD)
+    {
+        int fd;
+
+        VkMemoryGetFdInfoKHR vkMemoryGetFdInfoKHR = {};
+        vkMemoryGetFdInfoKHR.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
+        vkMemoryGetFdInfoKHR.pNext = NULL;
+        vkMemoryGetFdInfoKHR.memory = m_vkDeviceMemory;
+        vkMemoryGetFdInfoKHR.handleType =
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+        vkGetMemoryFdKHR(m_device, &vkMemoryGetFdInfoKHR, &fd);
+
+        return fd;
+    }
+    return HANDLE_ERROR;
+}
+#endif
+
+bool VulkanDeviceMemory::isDedicated() const { return m_isDedicated; }
+
+void *VulkanDeviceMemory::map(size_t offset, size_t size)
+{
+    void *pData;
+
+    vkMapMemory(m_device, m_vkDeviceMemory, (VkDeviceSize)offset,
+                (VkDeviceSize)size, 0, &pData);
+
+    return pData;
+}
+
+void VulkanDeviceMemory::unmap() { vkUnmapMemory(m_device, m_vkDeviceMemory); }
+
+void VulkanDeviceMemory::bindBuffer(const VulkanBuffer &buffer, uint64_t offset)
+{
+    vkBindBufferMemory(m_device, buffer, m_vkDeviceMemory, offset);
+}
+
+void VulkanDeviceMemory::bindImage(const VulkanImage &image, uint64_t offset)
+{
+    vkBindImageMemory(m_device, image, m_vkDeviceMemory, offset);
+}
+
+VulkanDeviceMemory::operator VkDeviceMemory() const { return m_vkDeviceMemory; }
+
+////////////////////////////////////
+// VulkanSemaphore implementation //
+////////////////////////////////////
+
+VulkanSemaphore::VulkanSemaphore(const VulkanSemaphore &semaphore)
+    : m_device(semaphore.m_device), m_vkSemaphore(semaphore.m_vkSemaphore)
+{}
+
+VulkanSemaphore::VulkanSemaphore(
+    const VulkanDevice &device,
+    VulkanExternalSemaphoreHandleType externalSemaphoreHandleType,
+    const std::wstring name)
+    : m_device(device), m_name(name)
+{
+#if defined(_WIN32) || defined(_WIN64)
+    WindowsSecurityAttributes winSecurityAttributes;
+
+    VkExportSemaphoreWin32HandleInfoKHR
+        vkExportSemaphoreWin32HandleInfoKHR = {};
+    vkExportSemaphoreWin32HandleInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_WIN32_HANDLE_INFO_KHR;
+    vkExportSemaphoreWin32HandleInfoKHR.pNext = NULL;
+    vkExportSemaphoreWin32HandleInfoKHR.pAttributes = &winSecurityAttributes;
+    vkExportSemaphoreWin32HandleInfoKHR.dwAccess =
+        DXGI_SHARED_RESOURCE_READ | DXGI_SHARED_RESOURCE_WRITE;
+    vkExportSemaphoreWin32HandleInfoKHR.name =
+        m_name.size() ? (LPCWSTR)m_name.c_str() : NULL;
+#endif
+
+    VkExportSemaphoreCreateInfoKHR vkExportSemaphoreCreateInfoKHR = {};
+    vkExportSemaphoreCreateInfoKHR.sType =
+        VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO_KHR;
+#if defined(_WIN32) || defined(_WIN64)
+    vkExportSemaphoreCreateInfoKHR.pNext =
+        (externalSemaphoreHandleType
+         & VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT)
+        ? &vkExportSemaphoreWin32HandleInfoKHR
+        : NULL;
+#else
+    vkExportSemaphoreCreateInfoKHR.pNext = NULL;
+#endif
+    vkExportSemaphoreCreateInfoKHR.handleTypes =
+        (VkExternalSemaphoreHandleTypeFlagsKHR)externalSemaphoreHandleType;
+
+    VkSemaphoreCreateInfo vkSemaphoreCreateInfo = {};
+    vkSemaphoreCreateInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
+    vkSemaphoreCreateInfo.pNext =
+        (externalSemaphoreHandleType
+         != VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE)
+        ? &vkExportSemaphoreCreateInfoKHR
+        : NULL;
+    vkSemaphoreCreateInfo.flags = 0;
+
+    vkCreateSemaphore(m_device, &vkSemaphoreCreateInfo, NULL, &m_vkSemaphore);
+}
+
+VulkanSemaphore::~VulkanSemaphore()
+{
+    vkDestroySemaphore(m_device, m_vkSemaphore, NULL);
+}
+
+#if defined(_WIN32) || defined(_WIN64)
+HANDLE VulkanSemaphore::getHandle(
+    VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const
+{
+    HANDLE handle;
+
+    VkSemaphoreGetWin32HandleInfoKHR vkSemaphoreGetWin32HandleInfoKHR = {};
+    vkSemaphoreGetWin32HandleInfoKHR.sType =
+        VK_STRUCTURE_TYPE_SEMAPHORE_GET_WIN32_HANDLE_INFO_KHR;
+    vkSemaphoreGetWin32HandleInfoKHR.pNext = NULL;
+    vkSemaphoreGetWin32HandleInfoKHR.semaphore = m_vkSemaphore;
+    vkSemaphoreGetWin32HandleInfoKHR.handleType =
+        (VkExternalSemaphoreHandleTypeFlagBitsKHR)externalSemaphoreHandleType;
+
+    vkGetSemaphoreWin32HandleKHR(m_device, &vkSemaphoreGetWin32HandleInfoKHR,
+                                 &handle);
+
+    return handle;
+}
+#else
+int VulkanSemaphore::getHandle(
+    VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const
+{
+    if (externalSemaphoreHandleType
+        == VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD)
+    {
+        int fd;
+
+        VkSemaphoreGetFdInfoKHR vkSemaphoreGetFdInfoKHR = {};
+        vkSemaphoreGetFdInfoKHR.sType =
+            VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR;
+        vkSemaphoreGetFdInfoKHR.pNext = NULL;
+        vkSemaphoreGetFdInfoKHR.semaphore = m_vkSemaphore;
+        vkSemaphoreGetFdInfoKHR.handleType =
+            VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+
+        vkGetSemaphoreFdKHR(m_device, &vkSemaphoreGetFdInfoKHR, &fd);
+
+        return fd;
+    }
+    return HANDLE_ERROR;
+}
+#endif
+
+const std::wstring &VulkanSemaphore::getName() const { return m_name; }
+
+VulkanSemaphore::operator VkSemaphore() const { return m_vkSemaphore; }
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
new file mode 100644
index 00000000..1f68a92b
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
@@ -0,0 +1,579 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_wrapper_hpp_
+#define _vulkan_wrapper_hpp_
+
+#include <vulkan/vulkan.h>
+#include "vulkan_wrapper_types.hpp"
+#include "vulkan_list_map.hpp"
+#include "vulkan_api_list.hpp"
+
+class VulkanInstance {
+    friend const VulkanInstance &getVulkanInstance();
+
+protected:
+    VkInstance m_vkInstance;
+    VulkanPhysicalDeviceList m_physicalDeviceList;
+
+    VulkanInstance();
+    VulkanInstance(const VulkanInstance &);
+    virtual ~VulkanInstance();
+
+public:
+    const VulkanPhysicalDeviceList &getPhysicalDeviceList() const;
+    operator VkInstance() const;
+};
+
+class VulkanPhysicalDevice {
+    friend class VulkanInstance;
+
+protected:
+    VkPhysicalDevice m_vkPhysicalDevice;
+    VkPhysicalDeviceProperties m_vkPhysicalDeviceProperties;
+    uint8_t m_vkDeviceUUID[VK_UUID_SIZE];
+    uint8_t m_vkDeviceLUID[VK_LUID_SIZE];
+    uint32_t m_vkDeviceNodeMask;
+    VkPhysicalDeviceFeatures m_vkPhysicalDeviceFeatures;
+    VkPhysicalDeviceMemoryProperties m_vkPhysicalDeviceMemoryProperties;
+    VulkanQueueFamilyList m_queueFamilyList;
+    VulkanMemoryHeapList m_memoryHeapList;
+    VulkanMemoryTypeList m_memoryTypeList;
+
+    VulkanPhysicalDevice(const VulkanPhysicalDevice &physicalDevice);
+    VulkanPhysicalDevice(VkPhysicalDevice vkPhysicalDevice);
+    virtual ~VulkanPhysicalDevice();
+
+public:
+    const VulkanQueueFamilyList &getQueueFamilyList() const;
+    const VulkanMemoryHeapList &getMemoryHeapList() const;
+    const VulkanMemoryTypeList &getMemoryTypeList() const;
+    const uint8_t *getUUID() const;
+    const uint8_t *getLUID() const;
+    uint32_t getNodeMask() const;
+    operator VkPhysicalDevice() const;
+};
+
+class VulkanMemoryHeap {
+    friend class VulkanPhysicalDevice;
+
+protected:
+    uint32_t m_memoryHeapIndex;
+    uint64_t m_size;
+    VulkanMemoryHeapFlag m_memoryHeapFlag;
+
+    VulkanMemoryHeap(const VulkanMemoryHeap &memoryHeap);
+    VulkanMemoryHeap(uint32_t m_memoryHeapIndex, uint64_t m_size,
+                     VulkanMemoryHeapFlag m_memoryHeapFlag);
+    virtual ~VulkanMemoryHeap();
+
+public:
+    uint64_t getSize() const;
+    VulkanMemoryHeapFlag getMemoryHeapFlag() const;
+    operator uint32_t() const;
+};
+
+class VulkanMemoryType {
+    friend class VulkanPhysicalDevice;
+
+protected:
+    uint32_t m_memoryTypeIndex;
+    const VulkanMemoryTypeProperty m_memoryTypeProperty;
+    const VulkanMemoryHeap &m_memoryHeap;
+
+    VulkanMemoryType(const VulkanMemoryType &memoryType);
+    VulkanMemoryType(uint32_t memoryTypeIndex,
+                     VulkanMemoryTypeProperty memoryTypeProperty,
+                     const VulkanMemoryHeap &memoryHeap);
+    virtual ~VulkanMemoryType();
+
+public:
+    VulkanMemoryTypeProperty getMemoryTypeProperty() const;
+    const VulkanMemoryHeap &getMemoryHeap() const;
+    operator uint32_t() const;
+};
+
+class VulkanQueueFamily {
+    friend class VulkanPhysicalDevice;
+
+protected:
+    uint32_t m_queueFamilyIndex;
+    VkQueueFamilyProperties m_vkQueueFamilyProperties;
+
+    VulkanQueueFamily(const VulkanQueueFamily &queueFamily);
+    VulkanQueueFamily(uint32_t queueFamilyIndex,
+                      VkQueueFamilyProperties vkQueueFamilyProperties);
+    virtual ~VulkanQueueFamily();
+
+public:
+    uint32_t getQueueFlags() const;
+    uint32_t getQueueCount() const;
+    operator uint32_t() const;
+};
+
+class VulkanDevice {
+protected:
+    const VulkanPhysicalDevice &m_physicalDevice;
+    VkDevice m_vkDevice;
+    VulkanQueueFamilyToQueueListMap m_queueFamilyIndexToQueueListMap;
+
+    VulkanDevice(const VulkanDevice &device);
+
+public:
+    VulkanDevice(
+        const VulkanPhysicalDevice &physicalDevice = getVulkanPhysicalDevice(),
+        const VulkanQueueFamilyToQueueCountMap &queueFamilyToQueueCountMap =
+            getDefaultVulkanQueueFamilyToQueueCountMap());
+    virtual ~VulkanDevice();
+    const VulkanPhysicalDevice &getPhysicalDevice() const;
+    VulkanQueue &
+    getQueue(const VulkanQueueFamily &queueFamily = getVulkanQueueFamily(),
+             uint32_t queueIndex = 0);
+    operator VkDevice() const;
+};
+
+class VulkanQueue {
+    friend class VulkanDevice;
+
+protected:
+    VkQueue m_vkQueue;
+
+    VulkanQueue(VkQueue vkQueue);
+    VulkanQueue(const VulkanQueue &queue);
+    virtual ~VulkanQueue();
+
+public:
+    const VulkanQueueFamily &getQueueFamily();
+    void submit(const VulkanSemaphoreList &waitSemaphoreList,
+                const VulkanCommandBufferList &commandBufferList,
+                const VulkanSemaphoreList &signalSemaphoreList);
+    void submit(const VulkanSemaphore &waitSemaphore,
+                const VulkanCommandBuffer &commandBuffer,
+                const VulkanSemaphore &signalSemaphore);
+    void submit(const VulkanCommandBuffer &commandBuffer,
+                const VulkanSemaphore &signalSemaphore);
+    void submit(const VulkanCommandBuffer &commandBuffer);
+    void waitIdle();
+    operator VkQueue() const;
+};
+
+class VulkanDescriptorSetLayoutBinding {
+protected:
+    VkDescriptorSetLayoutBinding m_vkDescriptorSetLayoutBinding;
+
+    VulkanDescriptorSetLayoutBinding(
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+
+public:
+    VulkanDescriptorSetLayoutBinding(
+        uint32_t binding, VulkanDescriptorType descriptorType,
+        uint32_t descriptorCount = 1,
+        VulkanShaderStage shaderStage = VULKAN_SHADER_STAGE_COMPUTE);
+    virtual ~VulkanDescriptorSetLayoutBinding();
+    operator VkDescriptorSetLayoutBinding() const;
+};
+
+class VulkanDescriptorSetLayout {
+protected:
+    const VulkanDevice &m_device;
+    VkDescriptorSetLayout m_vkDescriptorSetLayout;
+
+    VulkanDescriptorSetLayout(
+        const VulkanDescriptorSetLayout &descriptorSetLayout);
+    void
+    VulkanDescriptorSetLayoutCommon(const VulkanDescriptorSetLayoutBindingList
+                                        &descriptorSetLayoutBindingList);
+
+public:
+    VulkanDescriptorSetLayout(
+        const VulkanDevice &device,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+    VulkanDescriptorSetLayout(
+        const VulkanDevice &device,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1);
+    VulkanDescriptorSetLayout(const VulkanDevice &device,
+                              const VulkanDescriptorSetLayoutBindingList
+                                  &descriptorSetLayoutBindingList);
+    virtual ~VulkanDescriptorSetLayout();
+    operator VkDescriptorSetLayout() const;
+};
+
+class VulkanPipelineLayout {
+protected:
+    const VulkanDevice &m_device;
+    VkPipelineLayout m_vkPipelineLayout;
+
+    VulkanPipelineLayout(const VulkanPipelineLayout &pipelineLayout);
+    void VulkanPipelineLayoutCommon(
+        const VulkanDescriptorSetLayoutList &descriptorSetLayoutList);
+
+public:
+    VulkanPipelineLayout(const VulkanDevice &device,
+                         const VulkanDescriptorSetLayout &descriptorSetLayout);
+    VulkanPipelineLayout(
+        const VulkanDevice &device,
+        const VulkanDescriptorSetLayoutList &descriptorSetLayoutList =
+            getEmptyVulkanDescriptorSetLayoutList());
+    virtual ~VulkanPipelineLayout();
+    operator VkPipelineLayout() const;
+};
+
+class VulkanShaderModule {
+protected:
+    const VulkanDevice &m_device;
+    VkShaderModule m_vkShaderModule;
+
+    VulkanShaderModule(const VulkanShaderModule &shaderModule);
+
+public:
+    VulkanShaderModule(const VulkanDevice &device, const std::string &code);
+    virtual ~VulkanShaderModule();
+    operator VkShaderModule() const;
+};
+
+class VulkanPipeline {
+protected:
+    const VulkanDevice &m_device;
+    VkPipeline m_vkPipeline;
+
+    VulkanPipeline(const VulkanPipeline &pipeline);
+
+public:
+    VulkanPipeline(const VulkanDevice &device);
+    virtual ~VulkanPipeline();
+    virtual VulkanPipelineBindPoint getPipelineBindPoint() const = 0;
+    operator VkPipeline() const;
+};
+
+class VulkanComputePipeline : public VulkanPipeline {
+protected:
+    VulkanComputePipeline(const VulkanComputePipeline &computePipeline);
+
+public:
+    VulkanComputePipeline(const VulkanDevice &device,
+                          const VulkanPipelineLayout &pipelineLayout,
+                          const VulkanShaderModule &shaderModule,
+                          const std::string &entryFuncName = "main");
+    virtual ~VulkanComputePipeline();
+    VulkanPipelineBindPoint getPipelineBindPoint() const;
+};
+
+class VulkanDescriptorPool {
+protected:
+    const VulkanDevice &m_device;
+    VkDescriptorPool m_vkDescriptorPool;
+
+    VulkanDescriptorPool(const VulkanDescriptorPool &descriptorPool);
+    void VulkanDescriptorPoolCommon(const VulkanDescriptorSetLayoutBindingList
+                                        &descriptorSetLayoutBindingList);
+
+public:
+    VulkanDescriptorPool(
+        const VulkanDevice &device,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding);
+    VulkanDescriptorPool(
+        const VulkanDevice &device,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding0,
+        const VulkanDescriptorSetLayoutBinding &descriptorSetLayoutBinding1);
+    VulkanDescriptorPool(const VulkanDevice &device,
+                         const VulkanDescriptorSetLayoutBindingList
+                             &descriptorSetLayoutBindingList);
+    virtual ~VulkanDescriptorPool();
+    operator VkDescriptorPool() const;
+};
+
+class VulkanDescriptorSet {
+protected:
+    const VulkanDevice &m_device;
+    const VulkanDescriptorPool &m_descriptorPool;
+    VkDescriptorSet m_vkDescriptorSet;
+
+    VulkanDescriptorSet(const VulkanDescriptorSet &descriptorSet);
+
+public:
+    VulkanDescriptorSet(const VulkanDevice &device,
+                        const VulkanDescriptorPool &descriptorPool,
+                        const VulkanDescriptorSetLayout &descriptorSetLayout);
+    virtual ~VulkanDescriptorSet();
+    void update(uint32_t binding, const VulkanBuffer &buffer);
+    void update(uint32_t binding, const VulkanImageView &imageView);
+    operator VkDescriptorSet() const;
+};
+
+class VulkanOffset3D {
+protected:
+    VkOffset3D m_vkOffset3D;
+
+public:
+    VulkanOffset3D(const VulkanOffset3D &extent3D);
+    VulkanOffset3D(uint32_t x = 0, uint32_t y = 0, uint32_t z = 0);
+    virtual ~VulkanOffset3D();
+    uint32_t getX() const;
+    uint32_t getY() const;
+    uint32_t getZ() const;
+    operator VkOffset3D() const;
+};
+
+class VulkanExtent3D {
+protected:
+    VkExtent3D m_vkExtent3D;
+
+public:
+    VulkanExtent3D(const VulkanExtent3D &extent3D);
+    VulkanExtent3D(uint32_t width, uint32_t height = 1, uint32_t depth = 1);
+    virtual ~VulkanExtent3D();
+    uint32_t getWidth() const;
+    uint32_t getHeight() const;
+    uint32_t getDepth() const;
+    operator VkExtent3D() const;
+};
+
+class VulkanCommandPool {
+protected:
+    const VulkanDevice &m_device;
+    VkCommandPool m_vkCommandPool;
+
+    VulkanCommandPool(const VulkanCommandPool &commandPool);
+
+public:
+    VulkanCommandPool(
+        const VulkanDevice &device,
+        const VulkanQueueFamily &queueFamily = getVulkanQueueFamily());
+    virtual ~VulkanCommandPool();
+    operator VkCommandPool() const;
+};
+
+class VulkanCommandBuffer {
+protected:
+    const VulkanDevice &m_device;
+    const VulkanCommandPool &m_commandPool;
+    VkCommandBuffer m_vkCommandBuffer;
+
+    VulkanCommandBuffer(const VulkanCommandBuffer &commandBuffer);
+
+public:
+    VulkanCommandBuffer(const VulkanDevice &device,
+                        const VulkanCommandPool &commandPool);
+    virtual ~VulkanCommandBuffer();
+    void begin();
+    void bindPipeline(const VulkanPipeline &pipeline);
+    void bindDescriptorSets(const VulkanPipeline &pipeline,
+                            const VulkanPipelineLayout &pipelineLayout,
+                            const VulkanDescriptorSet &descriptorSet);
+    void pipelineBarrier(const VulkanImage2DList &image2DList,
+                         VulkanImageLayout oldImageLayout,
+                         VulkanImageLayout newImageLayout);
+    void dispatch(uint32_t groupCountX, uint32_t groupCountY,
+                  uint32_t groupCountZ);
+    void fillBuffer(const VulkanBuffer &buffer, uint32_t data,
+                    uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE);
+    void updateBuffer(const VulkanBuffer &buffer, void *pdata,
+                      uint64_t offset = 0, uint64_t size = VK_WHOLE_SIZE);
+    void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image,
+                           VulkanImageLayout imageLayout =
+                               VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+    void copyBufferToImage(const VulkanBuffer &buffer, const VulkanImage &image,
+                           uint64_t bufferOffset = 0, uint32_t mipLevel = 0,
+                           uint32_t baseArrayLayer = 0, uint32_t layerCount = 1,
+                           VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0),
+                           VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0));
+    void copyImageToBuffer(const VulkanImage &image, const VulkanBuffer &buffer,
+                           uint64_t bufferOffset = 0, uint32_t mipLevel = 0,
+                           uint32_t baseArrayLayer = 0, uint32_t layerCount = 1,
+                           VulkanOffset3D offset3D = VulkanOffset3D(0, 0, 0),
+                           VulkanExtent3D extent3D = VulkanExtent3D(0, 0, 0));
+    void end();
+    operator VkCommandBuffer() const;
+};
+
+class VulkanBuffer {
+protected:
+    const VulkanDevice &m_device;
+    VkBuffer m_vkBuffer;
+    uint64_t m_size;
+    uint64_t m_alignment;
+    VulkanMemoryTypeList m_memoryTypeList;
+
+    VulkanBuffer(const VulkanBuffer &buffer);
+
+public:
+    VulkanBuffer(const VulkanDevice &device, uint64_t size,
+                 VulkanExternalMemoryHandleType externalMemoryHandleType =
+                     VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+                 VulkanBufferUsage bufferUsage =
+                     VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST,
+                 VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE,
+                 const VulkanQueueFamilyList &queueFamilyList =
+                     getEmptyVulkanQueueFamilyList());
+    virtual ~VulkanBuffer();
+    uint64_t getSize() const;
+    uint64_t getAlignment() const;
+    const VulkanMemoryTypeList &getMemoryTypeList() const;
+    operator VkBuffer() const;
+};
+
+class VulkanImage {
+protected:
+    const VulkanDevice &m_device;
+    const VulkanImageType m_imageType;
+    const VulkanExtent3D m_extent3D;
+    const VulkanFormat m_format;
+    const uint32_t m_numMipLevels;
+    const uint32_t m_numLayers;
+    VkImage m_vkImage;
+    uint64_t m_size;
+    uint64_t m_alignment;
+    VulkanMemoryTypeList m_memoryTypeList;
+    VkImageCreateInfo VulkanImageCreateInfo;
+    VulkanImage(const VulkanImage &image);
+
+public:
+    VulkanImage(
+        const VulkanDevice &device, VulkanImageType imageType,
+        VulkanFormat format, const VulkanExtent3D &extent3D,
+        uint32_t numMipLevels = 1, uint32_t arrayLayers = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlags = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageTiling imageTiling = VULKAN_IMAGE_TILING_OPTIMAL,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage();
+    virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+    VulkanFormat getFormat() const;
+    uint32_t getNumMipLevels() const;
+    uint32_t getNumLayers() const;
+    uint64_t getSize() const;
+    uint64_t getAlignment() const;
+    const VulkanMemoryTypeList &getMemoryTypeList() const;
+    VkImageCreateInfo getVkImageCreateInfo() const;
+    operator VkImage() const;
+};
+
+class VulkanImage2D : public VulkanImage {
+protected:
+    VkImageView m_vkImageView;
+
+    VulkanImage2D(const VulkanImage2D &image2D);
+
+public:
+    VulkanImage2D(
+        const VulkanDevice &device, VulkanFormat format, uint32_t width,
+        uint32_t height, uint32_t numMipLevels = 1,
+        VulkanExternalMemoryHandleType externalMemoryHandleType =
+            VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+        VulkanImageCreateFlag imageCreateFlag = VULKAN_IMAGE_CREATE_FLAG_NONE,
+        VulkanImageUsage imageUsage =
+            VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST,
+        VulkanSharingMode sharingMode = VULKAN_SHARING_MODE_EXCLUSIVE);
+    virtual ~VulkanImage2D();
+    virtual VulkanExtent3D getExtent3D(uint32_t mipLevel = 0) const;
+};
+
+class VulkanImageView {
+protected:
+    const VulkanDevice &m_device;
+    VkImageView m_vkImageView;
+
+    VulkanImageView(const VulkanImageView &imageView);
+
+public:
+    VulkanImageView(const VulkanDevice &device, const VulkanImage &image,
+                    VulkanImageViewType imageViewType,
+                    uint32_t baseMipLevel = 0,
+                    uint32_t mipLevelCount = VULKAN_REMAINING_MIP_LEVELS,
+                    uint32_t baseArrayLayer = 0,
+                    uint32_t layerCount = VULKAN_REMAINING_ARRAY_LAYERS);
+    virtual ~VulkanImageView();
+    operator VkImageView() const;
+};
+
+class VulkanDeviceMemory {
+protected:
+    const VulkanDevice &m_device;
+    VkDeviceMemory m_vkDeviceMemory;
+    uint64_t m_size;
+    bool m_isDedicated;
+
+    VulkanDeviceMemory(const VulkanDeviceMemory &deviceMemory);
+
+public:
+    VulkanDeviceMemory(const VulkanDevice &device, uint64_t size,
+                       const VulkanMemoryType &memoryType,
+                       VulkanExternalMemoryHandleType externalMemoryHandleType =
+                           VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+                       const void *name = NULL);
+    VulkanDeviceMemory(const VulkanDevice &device, const VulkanImage &image,
+                       const VulkanMemoryType &memoryType,
+                       VulkanExternalMemoryHandleType externalMemoryHandleType =
+                           VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE,
+                       const void *name = NULL);
+    virtual ~VulkanDeviceMemory();
+    uint64_t getSize() const;
+#ifdef _WIN32
+    HANDLE
+    getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const;
+#else
+    int
+    getHandle(VulkanExternalMemoryHandleType externalMemoryHandleType) const;
+#endif
+    bool isDedicated() const;
+    void *map(size_t offset = 0, size_t size = VK_WHOLE_SIZE);
+    void unmap();
+    void bindBuffer(const VulkanBuffer &buffer, uint64_t offset = 0);
+    void bindImage(const VulkanImage &image, uint64_t offset = 0);
+    operator VkDeviceMemory() const;
+};
+
+class VulkanSemaphore {
+    friend class VulkanQueue;
+
+protected:
+    const VulkanDevice &m_device;
+    VkSemaphore m_vkSemaphore;
+    const std::wstring m_name;
+
+    VulkanSemaphore(const VulkanSemaphore &semaphore);
+
+public:
+    VulkanSemaphore(
+        const VulkanDevice &device,
+        VulkanExternalSemaphoreHandleType externalSemaphoreHandleType =
+            VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE,
+        const std::wstring name = L"");
+    virtual ~VulkanSemaphore();
+#ifdef _WIN32
+    HANDLE getHandle(
+        VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const;
+#else
+    int getHandle(
+        VulkanExternalSemaphoreHandleType externalSemaphoreHandleType) const;
+#endif
+    const std::wstring &getName() const;
+    operator VkSemaphore() const;
+};
+
+
+#define VK_FUNC_DECL(name) extern "C" PFN_##name _##name;
+VK_FUNC_LIST
+#if defined(_WIN32) || defined(_WIN64)
+VK_WINDOWS_FUNC_LIST
+#endif
+#undef VK_FUNC_DECL
+
+#endif // _vulkan_wrapper_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp
new file mode 100644
index 00000000..359bcae4
--- /dev/null
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper_types.hpp
@@ -0,0 +1,463 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _vulkan_wrapper_types_hpp_
+#define _vulkan_wrapper_types_hpp_
+
+#include <vulkan/vulkan.h>
+
+#define VULKAN_MIN_BUFFER_OFFSET_COPY_ALIGNMENT 4
+#define VULKAN_REMAINING_MIP_LEVELS VK_REMAINING_MIP_LEVELS
+#define VULKAN_REMAINING_ARRAY_LAYERS VK_REMAINING_ARRAY_LAYERS
+
+class VulkanInstance;
+class VulkanPhysicalDevice;
+class VulkanMemoryHeap;
+class VulkanMemoryType;
+class VulkanQueueFamily;
+class VulkanDevice;
+class VulkanQueue;
+class VulkanDescriptorSetLayoutBinding;
+class VulkanDescriptorSetLayout;
+class VulkanPipelineLayout;
+class VulkanShaderModule;
+class VulkanPipeline;
+class VulkanComputePipeline;
+class VulkanDescriptorPool;
+class VulkanDescriptorSet;
+class VulkanCommandPool;
+class VulkanCommandBuffer;
+class VulkanBuffer;
+class VulkanOffset3D;
+class VulkanExtent3D;
+class VulkanImage;
+class VulkanImage2D;
+class VulkanImageView;
+class VulkanDeviceMemory;
+class VulkanSemaphore;
+
+class VulkanPhysicalDeviceList;
+class VulkanMemoryHeapList;
+class VulkanMemoryTypeList;
+class VulkanQueueFamilyList;
+class VulkanQueueFamilyToQueueCountMap;
+class VulkanQueueFamilyToQueueListMap;
+class VulkanQueueList;
+class VulkanCommandBufferList;
+class VulkanDescriptorSetLayoutList;
+class VulkanBufferList;
+class VulkanImage2DList;
+class VulkanImageViewList;
+class VulkanDeviceMemoryList;
+class VulkanSemaphoreList;
+
+enum VulkanQueueFlag
+{
+    VULKAN_QUEUE_FLAG_GRAPHICS = VK_QUEUE_GRAPHICS_BIT,
+    VULKAN_QUEUE_FLAG_COMPUTE = VK_QUEUE_COMPUTE_BIT,
+    VULKAN_QUEUE_FLAG_TRANSFER = VK_QUEUE_TRANSFER_BIT,
+    VULKAN_QUEUE_FLAG_MASK_ALL = VULKAN_QUEUE_FLAG_GRAPHICS
+        | VULKAN_QUEUE_FLAG_COMPUTE | VULKAN_QUEUE_FLAG_TRANSFER
+};
+
+enum VulkanDescriptorType
+{
+    VULKAN_DESCRIPTOR_TYPE_SAMPLER = VK_DESCRIPTOR_TYPE_SAMPLER,
+    VULKAN_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER =
+        VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+    VULKAN_DESCRIPTOR_TYPE_SAMPLED_IMAGE = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER =
+        VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER =
+        VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+    VULKAN_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC =
+        VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC,
+    VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC =
+        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC,
+    VULKAN_DESCRIPTOR_TYPE_INPUT_ATTACHMENT =
+        VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT,
+};
+
+enum VulkanShaderStage
+{
+    VULKAN_SHADER_STAGE_VERTEX = VK_SHADER_STAGE_VERTEX_BIT,
+    VULKAN_SHADER_STAGE_FRAGMENT = VK_SHADER_STAGE_FRAGMENT_BIT,
+    VULKAN_SHADER_STAGE_COMPUTE = VK_SHADER_STAGE_COMPUTE_BIT,
+    VULKAN_SHADER_STAGE_ALL_GRAPHICS = VK_SHADER_STAGE_ALL_GRAPHICS,
+    VULKAN_SHADER_STAGE_ALL = VK_SHADER_STAGE_ALL
+};
+
+enum VulkanPipelineBindPoint
+{
+    VULKAN_PIPELINE_BIND_POINT_GRAPHICS = VK_PIPELINE_BIND_POINT_GRAPHICS,
+    VULKAN_PIPELINE_BIND_POINT_COMPUTE = VK_PIPELINE_BIND_POINT_COMPUTE
+};
+
+enum VulkanMemoryTypeProperty
+{
+    VULKAN_MEMORY_TYPE_PROPERTY_NONE = 0,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_CACHED_COHERENT =
+        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_COHERENT =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
+    VULKAN_MEMORY_TYPE_PROPERTY_DEVICE_LOCAL_HOST_VISIBLE_CACHED_COHERENT =
+        VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT
+        | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
+        | VK_MEMORY_PROPERTY_HOST_CACHED_BIT
+        | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
+};
+
+enum VulkanMemoryHeapFlag
+{
+    VULKAN_MEMORY_HEAP_FLAG_NONE = 0,
+    VULKAN_MEMORY_HEAP_FLAG_DEVICE_LOCAL = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
+};
+
+enum VulkanExternalMemoryHandleType
+{
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_NONE = 0,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+    VULKAN_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+        VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+        | VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanExternalSemaphoreHandleType
+{
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NONE = 0,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR,
+    VULKAN_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_NT_KMT =
+        VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+        | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_KHR
+};
+
+enum VulkanBufferUsage
+{
+    VULKAN_BUFFER_USAGE_TRANSFER_SRC = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+    VULKAN_BUFFER_USAGE_TRANSFER_DST = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER =
+        VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_TEXEL_BUFFER =
+        VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_BUFFER = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_INDEX_BUFFER = VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_VERTEX_BUFFER = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_INDIRECT_BUFFER = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
+    VULKAN_BUFFER_USAGE_STORAGE_BUFFER_TRANSFER_SRC_DST =
+        VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+    VULKAN_BUFFER_USAGE_UNIFORM_BUFFER_TRANSFER_SRC_DST =
+        VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT
+        | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+};
+
+enum VulkanSharingMode
+{
+    VULKAN_SHARING_MODE_EXCLUSIVE = VK_SHARING_MODE_EXCLUSIVE,
+    VULKAN_SHARING_MODE_CONCURRENT = VK_SHARING_MODE_CONCURRENT
+};
+
+enum VulkanImageType
+{
+    VULKAN_IMAGE_TYPE_1D = VK_IMAGE_TYPE_1D,
+    VULKAN_IMAGE_TYPE_2D = VK_IMAGE_TYPE_2D,
+    VULKAN_IMAGE_TYPE_3D = VK_IMAGE_TYPE_3D
+};
+
+enum VulkanFormat
+{
+    VULKAN_FORMAT_UNDEFINED = VK_FORMAT_UNDEFINED,
+    VULKAN_FORMAT_R4G4_UNORM_PACK8 = VK_FORMAT_R4G4_UNORM_PACK8,
+    VULKAN_FORMAT_R4G4B4A4_UNORM_PACK16 = VK_FORMAT_R4G4B4A4_UNORM_PACK16,
+    VULKAN_FORMAT_B4G4R4A4_UNORM_PACK16 = VK_FORMAT_B4G4R4A4_UNORM_PACK16,
+    VULKAN_FORMAT_R5G6B5_UNORM_PACK16 = VK_FORMAT_R5G6B5_UNORM_PACK16,
+    VULKAN_FORMAT_B5G6R5_UNORM_PACK16 = VK_FORMAT_B5G6R5_UNORM_PACK16,
+    VULKAN_FORMAT_R5G5B5A1_UNORM_PACK16 = VK_FORMAT_R5G5B5A1_UNORM_PACK16,
+    VULKAN_FORMAT_B5G5R5A1_UNORM_PACK16 = VK_FORMAT_B5G5R5A1_UNORM_PACK16,
+    VULKAN_FORMAT_A1R5G5B5_UNORM_PACK16 = VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+    VULKAN_FORMAT_R8_UNORM = VK_FORMAT_R8_UNORM,
+    VULKAN_FORMAT_R8_SNORM = VK_FORMAT_R8_SNORM,
+    VULKAN_FORMAT_R8_USCALED = VK_FORMAT_R8_USCALED,
+    VULKAN_FORMAT_R8_SSCALED = VK_FORMAT_R8_SSCALED,
+    VULKAN_FORMAT_R8_UINT = VK_FORMAT_R8_UINT,
+    VULKAN_FORMAT_R8_SINT = VK_FORMAT_R8_SINT,
+    VULKAN_FORMAT_R8_SRGB = VK_FORMAT_R8_SRGB,
+    VULKAN_FORMAT_R8G8_SNORM = VK_FORMAT_R8G8_SNORM,
+    VULKAN_FORMAT_R8G8_UNORM = VK_FORMAT_R8G8_UNORM,
+    VULKAN_FORMAT_R8G8_USCALED = VK_FORMAT_R8G8_USCALED,
+    VULKAN_FORMAT_R8G8_SSCALED = VK_FORMAT_R8G8_SSCALED,
+    VULKAN_FORMAT_R8G8_UINT = VK_FORMAT_R8G8_UINT,
+    VULKAN_FORMAT_R8G8_SINT = VK_FORMAT_R8G8_SINT,
+    VULKAN_FORMAT_R8G8_SRGB = VK_FORMAT_R8G8_SRGB,
+    VULKAN_FORMAT_R8G8B8_UNORM = VK_FORMAT_R8G8B8_UNORM,
+    VULKAN_FORMAT_R8G8B8_SNORM = VK_FORMAT_R8G8B8_SNORM,
+    VULKAN_FORMAT_R8G8B8_USCALED = VK_FORMAT_R8G8B8_USCALED,
+    VULKAN_FORMAT_R8G8B8_SSCALED = VK_FORMAT_R8G8B8_SSCALED,
+    VULKAN_FORMAT_R8G8B8_UINT = VK_FORMAT_R8G8B8_UINT,
+    VULKAN_FORMAT_R8G8B8_SINT = VK_FORMAT_R8G8B8_SINT,
+    VULKAN_FORMAT_R8G8B8_SRGB = VK_FORMAT_R8G8B8_SRGB,
+    VULKAN_FORMAT_B8G8R8_UNORM = VK_FORMAT_B8G8R8_UNORM,
+    VULKAN_FORMAT_B8G8R8_SNORM = VK_FORMAT_B8G8R8_SNORM,
+    VULKAN_FORMAT_B8G8R8_USCALED = VK_FORMAT_B8G8R8_USCALED,
+    VULKAN_FORMAT_B8G8R8_SSCALED = VK_FORMAT_B8G8R8_SSCALED,
+    VULKAN_FORMAT_B8G8R8_UINT = VK_FORMAT_B8G8R8_UINT,
+    VULKAN_FORMAT_B8G8R8_SINT = VK_FORMAT_B8G8R8_SINT,
+    VULKAN_FORMAT_B8G8R8_SRGB = VK_FORMAT_B8G8R8_SRGB,
+    VULKAN_FORMAT_R8G8B8A8_UNORM = VK_FORMAT_R8G8B8A8_UNORM,
+    VULKAN_FORMAT_R8G8B8A8_SNORM = VK_FORMAT_R8G8B8A8_SNORM,
+    VULKAN_FORMAT_R8G8B8A8_USCALED = VK_FORMAT_R8G8B8A8_USCALED,
+    VULKAN_FORMAT_R8G8B8A8_SSCALED = VK_FORMAT_R8G8B8A8_SSCALED,
+    VULKAN_FORMAT_R8G8B8A8_UINT = VK_FORMAT_R8G8B8A8_UINT,
+    VULKAN_FORMAT_R8G8B8A8_SINT = VK_FORMAT_R8G8B8A8_SINT,
+    VULKAN_FORMAT_R8G8B8A8_SRGB = VK_FORMAT_R8G8B8A8_SRGB,
+    VULKAN_FORMAT_B8G8R8A8_UNORM = VK_FORMAT_B8G8R8A8_UNORM,
+    VULKAN_FORMAT_B8G8R8A8_SNORM = VK_FORMAT_B8G8R8A8_SNORM,
+    VULKAN_FORMAT_B8G8R8A8_USCALED = VK_FORMAT_B8G8R8A8_USCALED,
+    VULKAN_FORMAT_B8G8R8A8_SSCALED = VK_FORMAT_B8G8R8A8_SSCALED,
+    VULKAN_FORMAT_B8G8R8A8_UINT = VK_FORMAT_B8G8R8A8_UINT,
+    VULKAN_FORMAT_B8G8R8A8_SINT = VK_FORMAT_B8G8R8A8_SINT,
+    VULKAN_FORMAT_B8G8R8A8_SRGB = VK_FORMAT_B8G8R8A8_SRGB,
+    VULKAN_FORMAT_A8B8G8R8_UNORM_PACK32 = VK_FORMAT_A8B8G8R8_UNORM_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SNORM_PACK32 = VK_FORMAT_A8B8G8R8_SNORM_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_USCALED_PACK32 = VK_FORMAT_A8B8G8R8_USCALED_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SSCALED_PACK32 = VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_UINT_PACK32 = VK_FORMAT_A8B8G8R8_UINT_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SINT_PACK32 = VK_FORMAT_A8B8G8R8_SINT_PACK32,
+    VULKAN_FORMAT_A8B8G8R8_SRGB_PACK32 = VK_FORMAT_A8B8G8R8_SRGB_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_UNORM_PACK32 = VK_FORMAT_A2R10G10B10_UNORM_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SNORM_PACK32 = VK_FORMAT_A2R10G10B10_SNORM_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_USCALED_PACK32 =
+        VK_FORMAT_A2R10G10B10_USCALED_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SSCALED_PACK32 =
+        VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_UINT_PACK32 = VK_FORMAT_A2R10G10B10_UINT_PACK32,
+    VULKAN_FORMAT_A2R10G10B10_SINT_PACK32 = VK_FORMAT_A2R10G10B10_SINT_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_UNORM_PACK32 = VK_FORMAT_A2B10G10R10_UNORM_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SNORM_PACK32 = VK_FORMAT_A2B10G10R10_SNORM_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_USCALED_PACK32 =
+        VK_FORMAT_A2B10G10R10_USCALED_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SSCALED_PACK32 =
+        VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_UINT_PACK32 = VK_FORMAT_A2B10G10R10_UINT_PACK32,
+    VULKAN_FORMAT_A2B10G10R10_SINT_PACK32 = VK_FORMAT_A2B10G10R10_SINT_PACK32,
+    VULKAN_FORMAT_R16_UNORM = VK_FORMAT_R16_UNORM,
+    VULKAN_FORMAT_R16_SNORM = VK_FORMAT_R16_SNORM,
+    VULKAN_FORMAT_R16_USCALED = VK_FORMAT_R16_USCALED,
+    VULKAN_FORMAT_R16_SSCALED = VK_FORMAT_R16_SSCALED,
+    VULKAN_FORMAT_R16_UINT = VK_FORMAT_R16_UINT,
+    VULKAN_FORMAT_R16_SINT = VK_FORMAT_R16_SINT,
+    VULKAN_FORMAT_R16_SFLOAT = VK_FORMAT_R16_SFLOAT,
+    VULKAN_FORMAT_R16G16_UNORM = VK_FORMAT_R16G16_UNORM,
+    VULKAN_FORMAT_R16G16_SNORM = VK_FORMAT_R16G16_SNORM,
+    VULKAN_FORMAT_R16G16_USCALED = VK_FORMAT_R16G16_USCALED,
+    VULKAN_FORMAT_R16G16_SSCALED = VK_FORMAT_R16G16_SSCALED,
+    VULKAN_FORMAT_R16G16_UINT = VK_FORMAT_R16G16_UINT,
+    VULKAN_FORMAT_R16G16_SINT = VK_FORMAT_R16G16_SINT,
+    VULKAN_FORMAT_R16G16_SFLOAT = VK_FORMAT_R16G16_SFLOAT,
+    VULKAN_FORMAT_R16G16B16_UNORM = VK_FORMAT_R16G16B16_UNORM,
+    VULKAN_FORMAT_R16G16B16_SNORM = VK_FORMAT_R16G16B16_SNORM,
+    VULKAN_FORMAT_R16G16B16_USCALED = VK_FORMAT_R16G16B16_USCALED,
+    VULKAN_FORMAT_R16G16B16_SSCALED = VK_FORMAT_R16G16B16_SSCALED,
+    VULKAN_FORMAT_R16G16B16_UINT = VK_FORMAT_R16G16B16_UINT,
+    VULKAN_FORMAT_R16G16B16_SINT = VK_FORMAT_R16G16B16_SINT,
+    VULKAN_FORMAT_R16G16B16_SFLOAT = VK_FORMAT_R16G16B16_SFLOAT,
+    VULKAN_FORMAT_R16G16B16A16_UNORM = VK_FORMAT_R16G16B16A16_UNORM,
+    VULKAN_FORMAT_R16G16B16A16_SNORM = VK_FORMAT_R16G16B16A16_SNORM,
+    VULKAN_FORMAT_R16G16B16A16_USCALED = VK_FORMAT_R16G16B16A16_USCALED,
+    VULKAN_FORMAT_R16G16B16A16_SSCALED = VK_FORMAT_R16G16B16A16_SSCALED,
+    VULKAN_FORMAT_R16G16B16A16_UINT = VK_FORMAT_R16G16B16A16_UINT,
+    VULKAN_FORMAT_R16G16B16A16_SINT = VK_FORMAT_R16G16B16A16_SINT,
+    VULKAN_FORMAT_R16G16B16A16_SFLOAT = VK_FORMAT_R16G16B16A16_SFLOAT,
+    VULKAN_FORMAT_R32_UINT = VK_FORMAT_R32_UINT,
+    VULKAN_FORMAT_R32_SINT = VK_FORMAT_R32_SINT,
+    VULKAN_FORMAT_R32_SFLOAT = VK_FORMAT_R32_SFLOAT,
+    VULKAN_FORMAT_R32G32_UINT = VK_FORMAT_R32G32_UINT,
+    VULKAN_FORMAT_R32G32_SINT = VK_FORMAT_R32G32_SINT,
+    VULKAN_FORMAT_R32G32_SFLOAT = VK_FORMAT_R32G32_SFLOAT,
+    VULKAN_FORMAT_R32G32B32_UINT = VK_FORMAT_R32G32B32_UINT,
+    VULKAN_FORMAT_R32G32B32_SINT = VK_FORMAT_R32G32B32_SINT,
+    VULKAN_FORMAT_R32G32B32_SFLOAT = VK_FORMAT_R32G32B32_SFLOAT,
+    VULKAN_FORMAT_R32G32B32A32_UINT = VK_FORMAT_R32G32B32A32_UINT,
+    VULKAN_FORMAT_R32G32B32A32_SINT = VK_FORMAT_R32G32B32A32_SINT,
+    VULKAN_FORMAT_R32G32B32A32_SFLOAT = VK_FORMAT_R32G32B32A32_SFLOAT,
+    VULKAN_FORMAT_R64_UINT = VK_FORMAT_R64_UINT,
+    VULKAN_FORMAT_R64_SINT = VK_FORMAT_R64_SINT,
+    VULKAN_FORMAT_R64_SFLOAT = VK_FORMAT_R64_SFLOAT,
+    VULKAN_FORMAT_R64G64_UINT = VK_FORMAT_R64G64_UINT,
+    VULKAN_FORMAT_R64G64_SINT = VK_FORMAT_R64G64_SINT,
+    VULKAN_FORMAT_R64G64_SFLOAT = VK_FORMAT_R64G64_SFLOAT,
+    VULKAN_FORMAT_R64G64B64_UINT = VK_FORMAT_R64G64B64_UINT,
+    VULKAN_FORMAT_R64G64B64_SINT = VK_FORMAT_R64G64B64_SINT,
+    VULKAN_FORMAT_R64G64B64_SFLOAT = VK_FORMAT_R64G64B64_SFLOAT,
+    VULKAN_FORMAT_R64G64B64A64_UINT = VK_FORMAT_R64G64B64A64_UINT,
+    VULKAN_FORMAT_R64G64B64A64_SINT = VK_FORMAT_R64G64B64A64_SINT,
+    VULKAN_FORMAT_R64G64B64A64_SFLOAT = VK_FORMAT_R64G64B64A64_SFLOAT,
+    VULKAN_FORMAT_B10G11R11_UFLOAT_PACK32 = VK_FORMAT_B10G11R11_UFLOAT_PACK32,
+    VULKAN_FORMAT_E5B9G9R9_UFLOAT_PACK32 = VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
+    VULKAN_FORMAT_D16_UNORM = VK_FORMAT_D16_UNORM,
+    VULKAN_FORMAT_X8_D24_UNORM_PACK32 = VK_FORMAT_X8_D24_UNORM_PACK32,
+    VULKAN_FORMAT_D32_SFLOAT = VK_FORMAT_D32_SFLOAT,
+    VULKAN_FORMAT_S8_UINT = VK_FORMAT_S8_UINT,
+    VULKAN_FORMAT_D16_UNORM_S8_UINT = VK_FORMAT_D16_UNORM_S8_UINT,
+    VULKAN_FORMAT_D24_UNORM_S8_UINT = VK_FORMAT_D24_UNORM_S8_UINT,
+    VULKAN_FORMAT_D32_SFLOAT_S8_UINT = VK_FORMAT_D32_SFLOAT_S8_UINT,
+    VULKAN_FORMAT_BC1_RGB_UNORM_BLOCK = VK_FORMAT_BC1_RGB_UNORM_BLOCK,
+    VULKAN_FORMAT_BC1_RGB_SRGB_BLOCK = VK_FORMAT_BC1_RGB_SRGB_BLOCK,
+    VULKAN_FORMAT_BC1_RGBA_UNORM_BLOCK = VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
+    VULKAN_FORMAT_BC1_RGBA_SRGB_BLOCK = VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
+    VULKAN_FORMAT_BC2_UNORM_BLOCK = VK_FORMAT_BC2_UNORM_BLOCK,
+    VULKAN_FORMAT_BC2_SRGB_BLOCK = VK_FORMAT_BC2_SRGB_BLOCK,
+    VULKAN_FORMAT_BC3_UNORM_BLOCK = VK_FORMAT_BC3_UNORM_BLOCK,
+    VULKAN_FORMAT_BC3_SRGB_BLOCK = VK_FORMAT_BC3_SRGB_BLOCK,
+    VULKAN_FORMAT_BC4_UNORM_BLOCK = VK_FORMAT_BC4_UNORM_BLOCK,
+    VULKAN_FORMAT_BC4_SNORM_BLOCK = VK_FORMAT_BC4_SNORM_BLOCK,
+    VULKAN_FORMAT_BC5_UNORM_BLOCK = VK_FORMAT_BC5_UNORM_BLOCK,
+    VULKAN_FORMAT_BC5_SNORM_BLOCK = VK_FORMAT_BC5_SNORM_BLOCK,
+    VULKAN_FORMAT_BC6H_UFLOAT_BLOCK = VK_FORMAT_BC6H_UFLOAT_BLOCK,
+    VULKAN_FORMAT_BC6H_SFLOAT_BLOCK = VK_FORMAT_BC6H_SFLOAT_BLOCK,
+    VULKAN_FORMAT_BC7_UNORM_BLOCK = VK_FORMAT_BC7_UNORM_BLOCK,
+    VULKAN_FORMAT_BC7_SRGB_BLOCK = VK_FORMAT_BC7_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8_UNORM_BLOCK = VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK =
+        VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK =
+        VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
+    VULKAN_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK = VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
+    VULKAN_FORMAT_EAC_R11_UNORM_BLOCK = VK_FORMAT_EAC_R11_UNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11_SNORM_BLOCK = VK_FORMAT_EAC_R11_SNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11G11_UNORM_BLOCK = VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
+    VULKAN_FORMAT_EAC_R11G11_SNORM_BLOCK = VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_4x4_UNORM_BLOCK = VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_4x4_SRGB_BLOCK = VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_5x4_UNORM_BLOCK = VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_5x4_SRGB_BLOCK = VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_5x5_UNORM_BLOCK = VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_5x5_SRGB_BLOCK = VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_6x5_UNORM_BLOCK = VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_6x5_SRGB_BLOCK = VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_6x6_UNORM_BLOCK = VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_6x6_SRGB_BLOCK = VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x5_UNORM_BLOCK = VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x5_SRGB_BLOCK = VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x6_UNORM_BLOCK = VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x6_SRGB_BLOCK = VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_8x8_UNORM_BLOCK = VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_8x8_SRGB_BLOCK = VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x5_UNORM_BLOCK = VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x5_SRGB_BLOCK = VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x6_UNORM_BLOCK = VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x6_SRGB_BLOCK = VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x8_UNORM_BLOCK = VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x8_SRGB_BLOCK = VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_10x10_UNORM_BLOCK = VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_10x10_SRGB_BLOCK = VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_12x10_UNORM_BLOCK = VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_12x10_SRGB_BLOCK = VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
+    VULKAN_FORMAT_ASTC_12x12_UNORM_BLOCK = VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
+    VULKAN_FORMAT_ASTC_12x12_SRGB_BLOCK = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
+};
+
+enum VulkanImageLayout
+{
+    VULKAN_IMAGE_LAYOUT_UNDEFINED = VK_IMAGE_LAYOUT_UNDEFINED,
+    VULKAN_IMAGE_LAYOUT_GENERAL = VK_IMAGE_LAYOUT_GENERAL,
+    VULKAN_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL =
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+    VULKAN_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL =
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+};
+
+enum VulkanImageUsage
+{
+    VULKAN_IMAGE_USAGE_TRANSFER_SRC = VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
+    VULKAN_IMAGE_USAGE_TRANSFER_DST = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+    VULKAN_IMAGE_USAGE_SAMPLED = VK_IMAGE_USAGE_SAMPLED_BIT,
+    VULKAN_IMAGE_USAGE_STORAGE = VK_IMAGE_USAGE_STORAGE_BIT,
+    VULKAN_IMAGE_USAGE_COLOR_ATTACHMENT = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT =
+        VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_TRANSIENT_ATTACHMENT =
+        VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_INPUT_ATTACHMENT = VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+    VULKAN_IMAGE_USAGE_TRANSFER_SRC_DST =
+        VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+    VULKAN_IMAGE_USAGE_STORAGE_TRANSFER_SRC_DST = VULKAN_IMAGE_USAGE_STORAGE
+        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST,
+    VULKAN_IMAGE_USAGE_SAMPLED_STORAGE_TRANSFER_SRC_DST =
+        VK_IMAGE_USAGE_SAMPLED_BIT | VULKAN_IMAGE_USAGE_STORAGE
+        | VULKAN_IMAGE_USAGE_TRANSFER_SRC | VULKAN_IMAGE_USAGE_TRANSFER_DST
+};
+
+enum VulkanImageTiling
+{
+    VULKAN_IMAGE_TILING_OPTIMAL = VK_IMAGE_TILING_OPTIMAL,
+    VULKAN_IMAGE_TILING_LINEAR = VK_IMAGE_TILING_LINEAR
+};
+
+enum VulkanImageCreateFlag
+{
+    VULKAN_IMAGE_CREATE_FLAG_NONE = 0,
+    VULKAN_IMAGE_CREATE_FLAG_MUTABLE_FORMAT =
+        VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
+    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE =
+        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT,
+    VULKAN_IMAGE_CREATE_FLAG_CUBE_COMPATIBLE_MUTABLE_FORMAT =
+        VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT | VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT
+};
+
+enum VulkanImageViewType
+{
+    VULKAN_IMAGE_VIEW_TYPE_1D = VK_IMAGE_VIEW_TYPE_1D,
+    VULKAN_IMAGE_VIEW_TYPE_2D = VK_IMAGE_VIEW_TYPE_2D,
+    VULKAN_IMAGE_VIEW_TYPE_3D = VK_IMAGE_VIEW_TYPE_3D,
+    VULKAN_IMAGE_VIEW_TYPE_CUBE = VK_IMAGE_VIEW_TYPE_CUBE,
+    VULKAN_IMAGE_VIEW_TYPE_1D_ARRAY = VK_IMAGE_VIEW_TYPE_1D_ARRAY,
+    VULKAN_IMAGE_VIEW_TYPE_2D_ARRAY = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
+    VULKAN_IMAGE_VIEW_TYPE_CUBE_ARRAY = VK_IMAGE_VIEW_TYPE_CUBE_ARRAY,
+};
+
+#endif // _vulkan_wrapper_types_hpp_
-- 
cgit v1.2.3


From 1c19a4cbdbcaa9d8a683fed26d883735742b41c9 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 28 Jun 2022 17:05:11 +0100
Subject: Add tests for cl_khr_subgroup_rotate (#1439)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/CMakeLists.txt          |   1 +
 test_conformance/subgroups/main.cpp                |   3 +-
 test_conformance/subgroups/procs.h                 |   4 +
 .../subgroups/subgroup_common_templates.h          |  35 ++++++-
 test_conformance/subgroups/subhelpers.h            |   6 +-
 .../subgroups/test_subgroup_rotate.cpp             | 109 +++++++++++++++++++++
 6 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 test_conformance/subgroups/test_subgroup_rotate.cpp

diff --git a/test_conformance/subgroups/CMakeLists.txt b/test_conformance/subgroups/CMakeLists.txt
index d48af9cc..1ff249cf 100644
--- a/test_conformance/subgroups/CMakeLists.txt
+++ b/test_conformance/subgroups/CMakeLists.txt
@@ -15,6 +15,7 @@ set(${MODULE_NAME}_SOURCES
     test_subgroup_clustered_reduce.cpp
     test_subgroup_shuffle.cpp
     test_subgroup_shuffle_relative.cpp
+    test_subgroup_rotate.cpp
 )
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/subgroups/main.cpp b/test_conformance/subgroups/main.cpp
index ebe94558..a3ae910d 100644
--- a/test_conformance/subgroups/main.cpp
+++ b/test_conformance/subgroups/main.cpp
@@ -41,7 +41,8 @@ test_definition test_list[] = {
     ADD_TEST(subgroup_functions_ballot),
     ADD_TEST(subgroup_functions_clustered_reduce),
     ADD_TEST(subgroup_functions_shuffle),
-    ADD_TEST(subgroup_functions_shuffle_relative)
+    ADD_TEST(subgroup_functions_shuffle_relative),
+    ADD_TEST(subgroup_functions_rotate),
 };
 
 const int test_num = ARRAY_SIZE(test_list);
diff --git a/test_conformance/subgroups/procs.h b/test_conformance/subgroups/procs.h
index d09e8242..d4f51bec 100644
--- a/test_conformance/subgroups/procs.h
+++ b/test_conformance/subgroups/procs.h
@@ -81,4 +81,8 @@ extern int test_subgroup_functions_shuffle_relative(cl_device_id device,
                                                     cl_context context,
                                                     cl_command_queue queue,
                                                     int num_elements);
+extern int test_subgroup_functions_rotate(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
 #endif /*_procs_h*/
diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 0ffa46c8..5051f2e9 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -501,7 +501,31 @@ template <typename Ty, ShuffleOp operation> struct SHF
                     l = (((cl_uint)(genrand_int32(gMTdata) & 0x7fffffff) + 1)
                          % (ns * 2 + 1))
                         - 1;
-                    m[midx] = l;
+                    switch (operation)
+                    {
+                        case ShuffleOp::shuffle:
+                        case ShuffleOp::shuffle_xor:
+                        case ShuffleOp::shuffle_up:
+                        case ShuffleOp::shuffle_down:
+                            // storing information about shuffle index/delta
+                            m[midx] = (cl_int)l;
+                            break;
+                        case ShuffleOp::rotate:
+                        case ShuffleOp::clustered_rotate:
+                            // Storing information about rotate delta.
+                            // The delta must be the same for each thread in
+                            // the subgroup.
+                            if (i == 0)
+                            {
+                                m[midx] = (cl_int)l;
+                            }
+                            else
+                            {
+                                m[midx] = m[midx - 4];
+                            }
+                            break;
+                        default: break;
+                    }
                     cl_ulong number = genrand_int64(gMTdata);
                     set_value(t[ii + i], number);
                 }
@@ -565,6 +589,15 @@ template <typename Ty, ShuffleOp operation> struct SHF
                             if (l >= ns) skip = true;
                             tr_idx = i + l;
                             break;
+                        // rotate - treat l as delta
+                        case ShuffleOp::rotate:
+                            tr_idx = (i + l) % test_params.subgroup_size;
+                            break;
+                        case ShuffleOp::clustered_rotate: {
+                            tr_idx = ((i & ~(test_params.cluster_size - 1))
+                                      + ((i + l) % test_params.cluster_size));
+                            break;
+                        }
                         default: break;
                     }
 
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 12704db8..a305639a 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -251,7 +251,9 @@ enum class ShuffleOp
     shuffle,
     shuffle_up,
     shuffle_down,
-    shuffle_xor
+    shuffle_xor,
+    rotate,
+    clustered_rotate,
 };
 
 enum class ArithmeticOp
@@ -317,6 +319,8 @@ static const char *const operation_names(ShuffleOp operation)
         case ShuffleOp::shuffle_up: return "shuffle_up";
         case ShuffleOp::shuffle_down: return "shuffle_down";
         case ShuffleOp::shuffle_xor: return "shuffle_xor";
+        case ShuffleOp::rotate: return "rotate";
+        case ShuffleOp::clustered_rotate: return "clustered_rotate";
         default: log_error("Unknown operation request"); break;
     }
     return "";
diff --git a/test_conformance/subgroups/test_subgroup_rotate.cpp b/test_conformance/subgroups/test_subgroup_rotate.cpp
new file mode 100644
index 00000000..db0f48eb
--- /dev/null
+++ b/test_conformance/subgroups/test_subgroup_rotate.cpp
@@ -0,0 +1,109 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "subhelpers.h"
+#include "subgroup_common_kernels.h"
+#include "subgroup_common_templates.h"
+#include "harness/conversions.h"
+#include "harness/typeWrappers.h"
+
+namespace {
+
+template <typename T> int run_rotate_for_type(RunTestForType rft)
+{
+    int error = rft.run_impl<T, SHF<T, ShuffleOp::rotate>>("sub_group_rotate");
+    return error;
+}
+
+std::string sub_group_clustered_rotate_source = R"(
+    __kernel void test_%s(const __global Type *in, __global int4 *xy, __global Type *out,
+                          uint cluster_size) {
+        Type r;
+        int gid = get_global_id(0);
+        XY(xy,gid);
+        Type x = in[gid];
+        int delta = xy[gid].z;
+        switch (cluster_size) {
+            case 1: r = %s(x, delta, 1); break;
+            case 2: r = %s(x, delta, 2); break;
+            case 4: r = %s(x, delta, 4); break;
+            case 8: r = %s(x, delta, 8); break;
+            case 16: r = %s(x, delta, 16); break;
+            case 32: r = %s(x, delta, 32); break;
+            case 64: r = %s(x, delta, 64); break;
+            case 128: r = %s(x, delta, 128); break;
+        }
+        out[gid] = r;
+    }
+)";
+
+template <typename T> int run_clustered_rotate_for_type(RunTestForType rft)
+{
+    int error = rft.run_impl<T, SHF<T, ShuffleOp::clustered_rotate>>(
+        "sub_group_clustered_rotate");
+    return error;
+}
+
+}
+
+int test_subgroup_functions_rotate(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
+{
+    if (!is_extension_available(device, "cl_khr_subgroup_rotate"))
+    {
+        log_info("cl_khr_subgroup_rotate is not supported on this device, "
+                 "skipping test.\n");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    constexpr size_t global_work_size = 2000;
+    constexpr size_t local_work_size = 200;
+    WorkGroupParams test_params(global_work_size, local_work_size);
+    test_params.save_kernel_source(sub_group_generic_source);
+    RunTestForType rft(device, context, queue, num_elements, test_params);
+
+    int error = run_rotate_for_type<cl_int>(rft);
+    error |= run_rotate_for_type<cl_uint>(rft);
+    error |= run_rotate_for_type<cl_long>(rft);
+    error |= run_rotate_for_type<cl_ulong>(rft);
+    error |= run_rotate_for_type<cl_short>(rft);
+    error |= run_rotate_for_type<cl_ushort>(rft);
+    error |= run_rotate_for_type<cl_char>(rft);
+    error |= run_rotate_for_type<cl_uchar>(rft);
+    error |= run_rotate_for_type<cl_float>(rft);
+    error |= run_rotate_for_type<cl_double>(rft);
+    error |= run_rotate_for_type<subgroups::cl_half>(rft);
+
+    WorkGroupParams test_params_clustered(global_work_size, local_work_size, -1,
+                                          3);
+    test_params_clustered.save_kernel_source(sub_group_clustered_rotate_source);
+    RunTestForType rft_clustered(device, context, queue, num_elements,
+                                 test_params_clustered);
+
+    error |= run_clustered_rotate_for_type<cl_int>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_uint>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_long>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_ulong>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_short>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_ushort>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_char>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_uchar>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_float>(rft_clustered);
+    error |= run_clustered_rotate_for_type<cl_double>(rft_clustered);
+    error |= run_clustered_rotate_for_type<subgroups::cl_half>(rft_clustered);
+
+    return error;
+}
-- 
cgit v1.2.3


From e3e178676168c171b6d005403c0f1f408b6b4f29 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 1 Jul 2022 15:38:42 +0100
Subject: Fix newline in sample_image_pixel_float_offset log (#1446)

---
 test_common/harness/imageHelpers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index a254c48f..3dbdffa0 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -1994,7 +1994,7 @@ FloatPixel sample_image_pixel_float_offset(
                 break;
             case CL_MEM_OBJECT_IMAGE1D:
             case CL_MEM_OBJECT_IMAGE1D_BUFFER:
-                log_info("Starting coordinate: %f\b", x);
+                log_info("Starting coordinate: %f\n", x);
                 break;
             case CL_MEM_OBJECT_IMAGE2D:
                 log_info("Starting coordinate: %f, %f\n", x, y);
-- 
cgit v1.2.3


From 8d9d1f3e9da069cf5d224025160628ab3911ba00 Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 5 Jul 2022 22:28:18 +0530
Subject:  Fix math tests to allow ftz in relaxed mode. (#1371)

* Fix math tests to allow ftz in relaxed mode.

In recent spec clarification, it is agreed that ftz is
a valid optimization in case of cl-fast-math-relaxed
and doesn't require cl-denorms-are-zero to be passed
explicitly to enforce ftz behavior for implementations
that already support this.

GitHub Spec Issue OpenCL-Docs#579
GitHub Spec Issue OpenCL-Docs#597
GitHub CTS Issue OpenCL-CTS#1267
---
 test_conformance/math_brute_force/binary_double.cpp               | 4 +++-
 test_conformance/math_brute_force/binary_float.cpp                | 4 ++--
 test_conformance/math_brute_force/binary_i_double.cpp             | 6 +++++-
 test_conformance/math_brute_force/binary_i_float.cpp              | 7 +++++--
 test_conformance/math_brute_force/binary_operator_double.cpp      | 3 ++-
 test_conformance/math_brute_force/binary_operator_float.cpp       | 6 +++---
 test_conformance/math_brute_force/binary_two_results_i_double.cpp | 2 +-
 test_conformance/math_brute_force/binary_two_results_i_float.cpp  | 2 +-
 test_conformance/math_brute_force/i_unary_double.cpp              | 2 +-
 test_conformance/math_brute_force/i_unary_float.cpp               | 2 +-
 test_conformance/math_brute_force/macro_binary_double.cpp         | 8 ++++++--
 test_conformance/math_brute_force/macro_binary_float.cpp          | 8 ++++++--
 test_conformance/math_brute_force/macro_unary_double.cpp          | 8 ++++++--
 test_conformance/math_brute_force/macro_unary_float.cpp           | 8 ++++++--
 test_conformance/math_brute_force/ternary_double.cpp              | 2 +-
 test_conformance/math_brute_force/ternary_float.cpp               | 2 +-
 test_conformance/math_brute_force/unary_double.cpp                | 3 ++-
 test_conformance/math_brute_force/unary_float.cpp                 | 2 +-
 test_conformance/math_brute_force/unary_two_results_double.cpp    | 2 +-
 test_conformance/math_brute_force/unary_two_results_float.cpp     | 4 ++--
 test_conformance/math_brute_force/unary_two_results_i_double.cpp  | 2 +-
 test_conformance/math_brute_force/unary_two_results_i_float.cpp   | 2 +-
 test_conformance/math_brute_force/unary_u_double.cpp              | 2 +-
 test_conformance/math_brute_force/unary_u_float.cpp               | 2 +-
 24 files changed, 60 insertions(+), 33 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index a2b7d28b..ec8eb300 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -297,6 +297,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -481,7 +482,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
@@ -680,6 +681,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
     test_info.skipNanInf = 0;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 97712ee8..a706f772 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -461,7 +461,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         // Calculate the correctly rounded reference result
         memset(&oldMode, 0, sizeof(oldMode));
-        if (ftz) ForceFTZ(&oldMode);
+        if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
         // Set the rounding mode to match the device
         if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
@@ -546,7 +546,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     float err = Ulp_Error(test, correct);
                     int fail = !(fabsf(err) <= ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         // retry per section 6.5.3.2
                         if (IsFloatResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index f15c21ed..23a729e0 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -164,6 +164,8 @@ struct TestInfo
     cl_uint scale; // stride between individual test values
     float ulps; // max_allowed ulps
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 
     // no special values
 };
@@ -300,6 +302,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -482,7 +485,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
@@ -601,6 +604,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ulps = f->double_ulps;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 9e27b007..0cf7494f 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -162,7 +162,8 @@ struct TestInfo
     cl_uint scale; // stride between individual test values
     float ulps; // max_allowed ulps
     int ftz; // non-zero if running in flush to zero mode
-
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
     // no special values
 };
 
@@ -291,6 +292,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     float ulps = job->ulps;
     MTdata d = tinfo->d;
     cl_int error;
@@ -473,7 +475,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Ulp_Error(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, ulps))
@@ -595,6 +597,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
     test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index c407fdaa..f90a4d64 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -294,6 +294,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     float ulps = job->ulps;
     dptr func = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -476,7 +477,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 float err = Bruteforce_Ulp_Error_Double(test, correct);
                 int fail = !(fabsf(err) <= ulps);
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 7fbb07c2..535d7209 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -456,7 +456,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     // Calculate the correctly rounded reference result
     FPU_mode_type oldMode;
     memset(&oldMode, 0, sizeof(oldMode));
-    if (ftz) ForceFTZ(&oldMode);
+    if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
     // Set the rounding mode to match the device
     oldRoundMode = kRoundToNearestEven;
@@ -484,7 +484,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
     if (gIsInRTZMode) (void)set_round(oldRoundMode, kfloat);
 
-    if (ftz) RestoreFPState(&oldMode);
+    if (ftz || relaxedMode) RestoreFPState(&oldMode);
 
     // Read the data back -- no need to wait for the first N-1 buffers but wait
     // for the last buffer. This is an in order queue.
@@ -541,7 +541,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     ((!(fabsf(err) <= ulps)) && (!(fabsf(errB) <= ulps)));
                 if (fabsf(errB) < fabsf(err)) err = errB;
 
-                if (fail && ftz)
+                if (fail && (ftz || relaxedMode))
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 43dc1d30..be7064e4 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -379,7 +379,7 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
                 if (iptrUndefined) iErr = 0;
 
                 int fail = !(fabsf(err) <= f->double_ulps && iErr == 0);
-                if (ftz && fail)
+                if ((ftz || relaxedMode) && fail)
                 {
                     // retry per section 6.5.3.2
                     if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 83ceeaab..901c8598 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -379,7 +379,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                 if (iptrUndefined) iErr = 0;
 
                 int fail = !(fabsf(err) <= float_ulps && iErr == 0);
-                if (ftz && fail)
+                if ((ftz || relaxedMode) && fail)
                 {
                     // retry per section 6.5.3.2
                     if (IsFloatResultSubnormal(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index d09e14c1..f07dd78d 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -248,7 +248,7 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
                 // If we aren't getting the correctly rounded result
                 if (t[j] != q[j])
                 {
-                    if (ftz && IsDoubleSubnormal(s[j]))
+                    if ((ftz || relaxedMode) && IsDoubleSubnormal(s[j]))
                     {
                         unsigned int correct0 = f->dfunc.i_f(0.0);
                         unsigned int correct1 = f->dfunc.i_f(-0.0);
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 89b566d9..c38bdcf9 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -245,7 +245,7 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
                 // If we aren't getting the correctly rounded result
                 if (t[j] != q[j])
                 {
-                    if (ftz && IsFloatSubnormal(s[j]))
+                    if ((ftz || relaxedMode) && IsFloatSubnormal(s[j]))
                     {
                         unsigned int correct0 = f->func.i_f(0.0);
                         unsigned int correct1 = f->func.i_f(-0.0);
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d3e8071f..bb036a24 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -157,6 +157,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 // A table of more difficult cases to get right
@@ -282,6 +284,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -455,7 +458,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
             // If we aren't getting the correctly rounded result
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsDoubleSubnormal(s[j]))
                 {
@@ -503,7 +506,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsDoubleSubnormal(s[j]))
                     {
@@ -607,6 +610,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 6c7c8c05..f8cfc9b7 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -155,6 +155,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 // A table of more difficult cases to get right
@@ -272,6 +274,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -445,7 +448,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsFloatSubnormal(s[j]))
                 {
@@ -492,7 +495,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsFloatSubnormal(s[j]))
                     {
@@ -596,6 +599,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 7f3521c6..0e71f8a0 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -149,6 +149,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
@@ -161,6 +163,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     cl_int error;
     const char *name = job->f->name;
 
@@ -286,7 +289,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
             // If we aren't getting the correctly rounded result
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsDoubleSubnormal(s[j]))
                 {
@@ -311,7 +314,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsDoubleSubnormal(s[j]))
                     {
@@ -392,6 +395,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     test_info.f = f;
     test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 0cd54de4..3b53bdb0 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -148,6 +148,8 @@ struct TestInfo
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
 };
 
 cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
@@ -160,6 +162,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     fptr func = job->f->func;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     cl_int error = CL_SUCCESS;
     cl_int ret = CL_SUCCESS;
     const char *name = job->f->name;
@@ -290,7 +293,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             if (gMinVectorSizeIndex == 0 && t[j] != q[j])
             {
                 // If we aren't getting the correctly rounded result
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsFloatSubnormal(s[j]))
                     {
@@ -316,7 +319,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 // If we aren't getting the correctly rounded result
                 if (-t[j] != q[j])
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         if (IsFloatSubnormal(s[j]))
                         {
@@ -406,6 +409,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
     test_info.f = f;
     test_info.ftz =
         f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+    test_info.relaxedMode = relaxedMode;
 
     // cl_kernels aren't thread safe, so we make one for each vector size for
     // every thread
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 8af136ac..a3db3353 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -391,7 +391,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                     float err = Bruteforce_Ulp_Error_Double(test, correct);
                     int fail = !(fabsf(err) <= f->double_ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleSubnormal(correct))
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index c69083ad..fdcb48c4 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -443,7 +443,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
                     err = Ulp_Error(test, correct);
                     fail = !(fabsf(err) <= float_ulps);
 
-                    if (fail && ftz)
+                    if (fail && (ftz || relaxedMode))
                     {
                         float correct2, err2;
 
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 2d455047..3430fe34 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -172,6 +172,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     dptr func = job->f->dfunc;
     cl_int error;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
 
     Force64BitFPUPrecision();
 
@@ -305,7 +306,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 if (fail)
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, ulps))
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 83d27b0b..02a5c2cf 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -435,7 +435,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 if (fail)
                 {
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         typedef int (*CheckForSubnormal)(
                             double, float); // If we are in fast relaxed math,
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 8757fbc4..5556a080 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -291,7 +291,7 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
                     float err2 = Bruteforce_Ulp_Error_Double(test2, correct2);
                     int fail = !(fabsf(err) <= f->double_ulps
                                  && fabsf(err2) <= f->double_ulps);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index a54bd024..c95b10d3 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -258,7 +258,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             // Calculate the correctly rounded reference result
             memset(&oldMode, 0, sizeof(oldMode));
-            if (ftz) ForceFTZ(&oldMode);
+            if (ftz || relaxedMode) ForceFTZ(&oldMode);
 
             // Set the rounding mode to match the device
             if (gIsInRTZMode)
@@ -385,7 +385,7 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
                     int fail = !(fabsf(err) <= float_ulps
                                  && fabsf(err2) <= float_ulps);
 
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if ((*isFloatResultSubnormalPtr)(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 9ed77dce..c976061c 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -294,7 +294,7 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
                     cl_long iErr = (long long)q2[j] - (long long)correct2;
                     int fail = !(fabsf(err) <= f->double_ulps
                                  && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsDoubleResultSubnormal(correct, f->double_ulps))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index d048220b..7a3cd981 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -297,7 +297,7 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
                     cl_long iErr = (int64_t)q2[j] - (int64_t)correct2;
                     int fail = !(fabsf(err) <= float_ulps
                                  && abs_cl_long(iErr) <= maxiError);
-                    if (ftz)
+                    if (ftz || relaxedMode)
                     {
                         // retry per section 6.5.3.2
                         if (IsFloatResultSubnormal(correct, float_ulps))
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 9478d0bc..621ee6bb 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -249,7 +249,7 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 
                     if (fail)
                     {
-                        if (ftz)
+                        if (ftz || relaxedMode)
                         {
                             // retry per section 6.5.3.2
                             if (IsDoubleResultSubnormal(correct,
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 848a9bac..0eae2e54 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -253,7 +253,7 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 
                     if (fail)
                     {
-                        if (ftz)
+                        if (ftz || relaxedMode)
                         {
                             // retry per section 6.5.3.2
                             if (IsFloatResultSubnormal(correct, float_ulps))
-- 
cgit v1.2.3


From a37884fe4461362c39a444f39402baebac3e713b Mon Sep 17 00:00:00 2001
From: Callum Fare <callum@codeplay.com>
Date: Tue, 19 Jul 2022 17:43:36 +0100
Subject: Update cl_khr_extended_async_copies tests to the latest extension
 version (#1426)

* Update cl_khr_extended_async_copies tests to the latest version of the extension

Update the 2D and 3D extended async copies tests. Previously they were based on
an older provisional version of the extension.

Also update the variable names to only use 'stride' to refer to the actual
stride values. Previously the tests used 'stride' to refer to the end of one
line or plane and the start of the next. This is not the commonly understood
meaning.

* Address cl_khr_extended_async_copies PR feedback

* Remove unnecessary parenthesis in kernel code
* Make variables `const` and rearrange so that we can reuse
  variables, rather than repeating expressions.
* Add in missing vector size of 3 for 2D tests

* Use C++ String literals for kernel code

Rather than C strings use C++11 string literals to define the
kernel code in the extended async-copy tests. Doing this makes
the kernel code more readable.

Co-authored-by: Ewan Crawford <ewan@codeplay.com>
---
 test_conformance/basic/test_async_copy2D.cpp | 238 ++++++++++---------
 test_conformance/basic/test_async_copy3D.cpp | 331 +++++++++++++--------------
 2 files changed, 289 insertions(+), 280 deletions(-)

diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index fafcac83..54633a31 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -25,77 +25,81 @@
 #include "../../test_common/harness/conversions.h"
 #include "procs.h"
 
-static const char *async_global_to_local_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "srcStride)), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, "
-    "srcStride, dstStride, 0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    "     wait_group_events( 1, &event );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     dst[ (get_global_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ] = "
-    "localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + dstStride)+j ];\n"
-    "}\n";
-
-static const char *async_local_to_global_kernel2D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int lineCopiesPerWorkgroup, int "
-    "lineCopiesPerWorkItem, int srcStride, int dstStride )\n"
-    "{\n"
-    " int i, j;\n"
-    // Zero the local storage first
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = "
-    "(%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<lineCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numElementsPerLine; j++)\n"
-    "     localBuffer[ (get_local_id( 0 "
-    ")*lineCopiesPerWorkItem+i)*(numElementsPerLine + srcStride)+j ] = src[ "
-    "(get_global_id( 0 )*lineCopiesPerWorkItem+i)*(numElementsPerLine + "
-    "srcStride)+j ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_2D2D((__global "
-    "%s*)(dst+lineCopiesPerWorkgroup*get_group_id(0)*(numElementsPerLine + "
-    "dstStride)), (__local const %s*)localBuffer, (size_t)numElementsPerLine, "
-    "(size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
+static const char *async_global_to_local_kernel2D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst,
+                      __local %s *localBuffer, int numElementsPerLine,
+                      int lineCopiesPerWorkgroup, int lineCopiesPerWorkItem,
+                      int srcStride, int dstStride) {
+  // Zero the local storage first
+  for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numElementsPerLine; j++) {
+      const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+      localBuffer[index] = (%s)(%s)0;
+    }
+  }
+
+  // Do this to verify all kernels are done zeroing the local buffer before we
+  // try the copy
+  barrier( CLK_LOCAL_MEM_FENCE );
+  event_t event = async_work_group_copy_2D2D(localBuffer, 0, src,
+    lineCopiesPerWorkgroup * get_group_id(0) * srcStride, sizeof(%s),
+    (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride, dstStride, 0);
+
+  // Wait for the copy to complete, then verify by manually copying to the dest
+  wait_group_events(1, &event);
+
+  for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numElementsPerLine; j++) {
+      const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+      const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+      dst[global_index] = localBuffer[local_index];
+    }
+  }
+}
+)OpenCLC";
+
+static const char *async_local_to_global_kernel2D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+                      int numElementsPerLine, int lineCopiesPerWorkgroup,
+                      int lineCopiesPerWorkItem, int srcStride, int dstStride) {
+  // Zero the local storage first
+  for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numElementsPerLine; j++) {
+      const int index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
+      localBuffer[index] = (%s)(%s)0;
+    }
+  }
+
+  // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int i = 0; i < lineCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numElementsPerLine; j++) {
+      const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * srcStride + j;
+      const int global_index = (get_global_id(0)*lineCopiesPerWorkItem + i) * srcStride + j;
+      localBuffer[local_index] = src[global_index];
+    }
+  }
+
+  // Do this to verify all kernels are done copying to the local buffer before we try the copy
+  barrier(CLK_LOCAL_MEM_FENCE);
+  event_t event = async_work_group_copy_2D2D(dst, lineCopiesPerWorkgroup * get_group_id(0) * dstStride,
+    localBuffer, 0, sizeof(%s), (size_t)numElementsPerLine, (size_t)lineCopiesPerWorkgroup, srcStride,
+   dstStride, 0 );
+
+  wait_group_events(1, &event);
+};
+)OpenCLC";
 
 int test_copy2D(cl_device_id deviceID, cl_context context,
                 cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcStride, int dstStride,
+                ExplicitType vecType, int vecSize, int srcMargin, int dstMargin,
                 bool localIsDst)
 {
     int error;
@@ -114,8 +118,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
                 vecSize);
 
     size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcStride = %d, dstStride = %d\n", vecNameString,
-             srcStride, dstStride);
+    log_info("Testing %s with srcMargin = %d, dstMargin = %d\n", vecNameString,
+             srcMargin, dstMargin);
 
     cl_long max_local_mem_size;
     error =
@@ -153,7 +157,7 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
             vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
                                : "",
             vecNameString, vecNameString, vecNameString, vecNameString,
-            get_explicit_type_name(vecType), vecNameString, vecNameString);
+            get_explicit_type_name(vecType), vecNameString);
     // log_info("program: %s\n", programSource);
     programPtr = programSource;
 
@@ -180,12 +184,17 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     if (max_workgroup_size > max_local_workgroup_size[0])
         max_workgroup_size = max_local_workgroup_size[0];
 
-    size_t numElementsPerLine = 10;
-    size_t lineCopiesPerWorkItem = 13;
+    const size_t numElementsPerLine = 10;
+    const cl_int dstStride = numElementsPerLine + dstMargin;
+    const cl_int srcStride = numElementsPerLine + srcMargin;
+
     elementSize =
         get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem * elementSize
-        * (numElementsPerLine + (localIsDst ? dstStride : srcStride));
+
+    const size_t lineCopiesPerWorkItem = 13;
+    const size_t localStorageSpacePerWorkitem = lineCopiesPerWorkItem
+        * elementSize * (localIsDst ? dstStride : srcStride);
+
     size_t maxLocalWorkgroupSize =
         (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
 
@@ -199,34 +208,39 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
     if (maxLocalWorkgroupSize > max_workgroup_size)
         localWorkgroupSize = max_workgroup_size;
 
-    size_t maxTotalLinesIn = (max_alloc_size / elementSize + srcStride)
-        / (numElementsPerLine + srcStride);
-    size_t maxTotalLinesOut = (max_alloc_size / elementSize + dstStride)
-        / (numElementsPerLine + dstStride);
-    size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
-    size_t maxLocalWorkgroups =
+
+    const size_t maxTotalLinesIn =
+        (max_alloc_size / elementSize + srcMargin) / srcStride;
+    const size_t maxTotalLinesOut =
+        (max_alloc_size / elementSize + dstMargin) / dstStride;
+    const size_t maxTotalLines = std::min(maxTotalLinesIn, maxTotalLinesOut);
+    const size_t maxLocalWorkgroups =
         maxTotalLines / (localWorkgroupSize * lineCopiesPerWorkItem);
 
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstStride : srcStride);
-    size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
-    size_t totalLines =
+    const size_t localBufferSize =
+        localWorkgroupSize * localStorageSpacePerWorkitem
+        - (localIsDst ? dstMargin : srcMargin);
+    const size_t numberOfLocalWorkgroups =
+        std::min(1111, (int)maxLocalWorkgroups);
+    const size_t totalLines =
         numberOfLocalWorkgroups * localWorkgroupSize * lineCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * srcStride);
-    size_t outBufferSize = elementSize
-        * (totalLines * numElementsPerLine + (totalLines - 1) * dstStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
+    const size_t inBufferSize = elementSize
+        * (totalLines * numElementsPerLine + (totalLines - 1) * srcMargin);
+    const size_t outBufferSize = elementSize
+        * (totalLines * numElementsPerLine + (totalLines - 1) * dstMargin);
+    const size_t globalWorkgroupSize =
+        numberOfLocalWorkgroups * localWorkgroupSize;
 
     inBuffer = (void *)malloc(inBufferSize);
     outBuffer = (void *)malloc(outBufferSize);
     outBufferCopy = (void *)malloc(outBufferSize);
 
-    cl_int lineCopiesPerWorkItemInt, numElementsPerLineInt,
-        lineCopiesPerWorkgroup;
-    lineCopiesPerWorkItemInt = (int)lineCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    lineCopiesPerWorkgroup = (int)(lineCopiesPerWorkItem * localWorkgroupSize);
+    const cl_int lineCopiesPerWorkItemInt =
+        static_cast<cl_int>(lineCopiesPerWorkItem);
+    const cl_int numElementsPerLineInt =
+        static_cast<cl_int>(numElementsPerLine);
+    const cl_int lineCopiesPerWorkgroup =
+        static_cast<cl_int>(lineCopiesPerWorkItem * localWorkgroupSize);
 
     log_info(
         "Global: %d, local %d, local buffer %db, global in buffer %db, "
@@ -296,8 +310,8 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
         for (int j = 0; j < (int)numElementsPerLine * elementSize;
              j += elementSize)
         {
-            int inIdx = i * (numElementsPerLine + srcStride) + j;
-            int outIdx = i * (numElementsPerLine + dstStride) + j;
+            int inIdx = i * srcStride + j;
+            int outIdx = i * dstStride + j;
             if (memcmp(((char *)inBuffer) + inIdx, ((char *)outBuffer) + outIdx,
                        typeSize)
                 != 0)
@@ -332,11 +346,10 @@ int test_copy2D(cl_device_id deviceID, cl_context context,
         if (i < (int)(globalWorkgroupSize * lineCopiesPerWorkItem - 1)
                 * elementSize)
         {
-            int outIdx = i * (numElementsPerLine + dstStride)
-                + numElementsPerLine * elementSize;
+            int outIdx = i * dstStride + numElementsPerLine * elementSize;
             if (memcmp(((char *)outBuffer) + outIdx,
                        ((char *)outBufferCopy) + outIdx,
-                       dstStride * elementSize)
+                       dstMargin * elementSize)
                 != 0)
             {
                 if (failuresPrinted == 0)
@@ -373,9 +386,12 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
         kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
         kULong, kFloat, kDouble, kNumExplicitTypes
     };
+    // The margins below represent the number of elements between the end of
+    // one line and the start of the next. The strides are equivalent to the
+    // length of the line plus the chosen margin.
     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcStride, dstStride;
+    unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
+    unsigned int size, typeIndex, srcMargin, dstMargin;
 
     int errors = 0;
 
@@ -401,19 +417,19 @@ int test_copy2D_all_types(cl_device_id deviceID, cl_context context,
             if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
                 <= 2) // small type
             {
-                for (srcStride = 0; srcStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcStride++)
+                for (srcMargin = 0; srcMargin < sizeof(smallTypesMarginSizes)
+                         / sizeof(smallTypesMarginSizes[0]);
+                     srcMargin++)
                 {
-                    for (dstStride = 0;
-                         dstStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstStride++)
+                    for (dstMargin = 0;
+                         dstMargin < sizeof(smallTypesMarginSizes)
+                             / sizeof(smallTypesMarginSizes[0]);
+                         dstMargin++)
                     {
                         if (test_copy2D(deviceID, context, queue, kernelCode,
                                         vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcStride],
-                                        smallTypesStrideSizes[dstStride],
+                                        smallTypesMarginSizes[srcMargin],
+                                        smallTypesMarginSizes[dstMargin],
                                         localIsDst))
                         {
                             errors++;
diff --git a/test_conformance/basic/test_async_copy3D.cpp b/test_conformance/basic/test_async_copy3D.cpp
index 2b184ee5..5eb41ebc 100644
--- a/test_conformance/basic/test_async_copy3D.cpp
+++ b/test_conformance/basic/test_async_copy3D.cpp
@@ -25,96 +25,95 @@
 #include "../../test_common/harness/conversions.h"
 #include "procs.h"
 
-static const char *async_global_to_local_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
-    "(__global const "
-    "%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
-    "(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
-    "dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
-    "0 );\n"
-    // Wait for the copy to complete, then verify by manually copying to the
-    // dest
-    " wait_group_events( 1, &event );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       dst[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
-    "dstLineStride) + k ];\n"
-    "}\n";
-
-static const char *async_local_to_global_kernel3D =
-    "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
-    "%s\n" // optional pragma string
-    "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
-    "%s *localBuffer, int numElementsPerLine, int numLines, int "
-    "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
-    "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
-    "{\n"
-    " int i, j, k;\n"
-    // Zero the local storage first
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = (%s)(%s)0;\n"
-    // Do this to verify all kernels are done zeroing the local buffer before we
-    // try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
-    "   for(j=0; j<numLines; j++)\n"
-    "     for(k=0; k<numElementsPerLine; k++)\n"
-    "       localBuffer[ (get_local_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ] = src[ (get_global_id( 0 "
-    ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
-    "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
-    "srcLineStride) + k ];\n"
-    // Do this to verify all kernels are done copying to the local buffer before
-    // we try the copy
-    "    barrier( CLK_LOCAL_MEM_FENCE );\n"
-    "    event_t event;\n"
-    "    event = async_work_group_copy_3D3D((__global "
-    "%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
-    "numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
-    "const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
-    "srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
-    "dstPlaneStride, 0 );\n"
-    "    wait_group_events( 1, &event );\n"
-    "}\n";
+static const char *async_global_to_local_kernel3D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+                      int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
+                      int planesCopiesPerWorkItem, int srcLineStride,
+                      int dstLineStride, int srcPlaneStride, int dstPlaneStride ) {
+  // Zero the local storage first
+  for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numLines; j++) {
+      for (int k = 0; k < numElementsPerLine; k++) {
+        const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+        localBuffer[index] = (%s)(%s)0;
+      }
+    }
+  }
+
+  // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  event_t event = async_work_group_copy_3D3D(localBuffer, 0, src,
+    planesCopiesPerWorkgroup * get_group_id(0) * srcPlaneStride,
+    sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines,
+    planesCopiesPerWorkgroup, srcLineStride, srcPlaneStride, dstLineStride,
+    dstPlaneStride, 0);
+
+  // Wait for the copy to complete, then verify by manually copying to the dest
+  wait_group_events(1, &event);
+
+  for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numLines; j++) {
+      for(int k = 0; k < numElementsPerLine; k++) {
+        const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+        const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * dstPlaneStride + j * dstLineStride + k;
+        dst[global_index] = localBuffer[local_index];
+      }
+    }
+  }
+}
+)OpenCLC";
+
+static const char *async_local_to_global_kernel3D = R"OpenCLC(
+#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable
+%s // optional pragma string
+
+__kernel void test_fn(const __global %s *src, __global %s *dst, __local %s *localBuffer,
+                      int numElementsPerLine, int numLines, int planesCopiesPerWorkgroup,
+                      int planesCopiesPerWorkItem, int srcLineStride,
+                      int dstLineStride, int srcPlaneStride, int dstPlaneStride) {
+  // Zero the local storage first
+  for (int i = 0; i < planesCopiesPerWorkItem; i++) {
+    for (int j = 0; j < numLines; j++) {
+      for (int k = 0; k < numElementsPerLine; k++) {
+        const int index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
+        localBuffer[index] = (%s)(%s)0;
+      }
+    }
+  }
+
+  // Do this to verify all kernels are done zeroing the local buffer before we try the copy
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  for (int i=0; i < planesCopiesPerWorkItem; i++) {
+    for (int j=0; j < numLines; j++) {
+      for (int k=0; k < numElementsPerLine; k++) {
+        const int local_index = (get_local_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j * srcLineStride + k;
+        const int global_index = (get_global_id(0) * planesCopiesPerWorkItem + i) * srcPlaneStride + j*srcLineStride + k;
+        localBuffer[local_index] = src[global_index];
+      }
+    }
+  }
+
+  // Do this to verify all kernels are done copying to the local buffer before we try the copy
+  barrier(CLK_LOCAL_MEM_FENCE);
+
+  event_t event = async_work_group_copy_3D3D(dst,
+    planesCopiesPerWorkgroup * get_group_id(0) * dstPlaneStride, localBuffer, 0,
+    sizeof(%s), (size_t)numElementsPerLine, (size_t)numLines, planesCopiesPerWorkgroup,
+    srcLineStride, srcPlaneStride, dstLineStride, dstPlaneStride, 0);
+
+  wait_group_events(1, &event);
+}
+)OpenCLC";
 
 int test_copy3D(cl_device_id deviceID, cl_context context,
                 cl_command_queue queue, const char *kernelCode,
-                ExplicitType vecType, int vecSize, int srcLineStride,
-                int dstLineStride, int srcPlaneStride, int dstPlaneStride,
+                ExplicitType vecType, int vecSize, int srcLineMargin,
+                int dstLineMargin, int srcPlaneMargin, int dstPlaneMargin,
                 bool localIsDst)
 {
     int error;
@@ -133,10 +132,10 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
                 vecSize);
 
     size_t elementSize = get_explicit_type_size(vecType) * vecSize;
-    log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
-             "srcPlaneStride = %d, dstPlaneStride = %d\n",
-             vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
-             dstPlaneStride);
+    log_info("Testing %s with srcLineMargin = %d, dstLineMargin = %d, "
+             "srcPlaneMargin = %d, dstPlaneMargin = %d\n",
+             vecNameString, srcLineMargin, dstLineMargin, srcPlaneMargin,
+             dstPlaneMargin);
 
     cl_long max_local_mem_size;
     error =
@@ -201,16 +200,20 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     if (max_workgroup_size > max_local_workgroup_size[0])
         max_workgroup_size = max_local_workgroup_size[0];
 
-    size_t numElementsPerLine = 10;
-    size_t numLines = 13;
-    size_t planesCopiesPerWorkItem = 2;
+    const size_t numElementsPerLine = 10;
+    const cl_int dstLineStride = numElementsPerLine + dstLineMargin;
+    const cl_int srcLineStride = numElementsPerLine + srcLineMargin;
+
+    const size_t numLines = 13;
+    const cl_int dstPlaneStride = (numLines * dstLineStride) + dstPlaneMargin;
+    const cl_int srcPlaneStride = (numLines * srcLineStride) + srcPlaneMargin;
+
     elementSize =
         get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
-    size_t localStorageSpacePerWorkitem = elementSize
-        * (planesCopiesPerWorkItem
-           * (numLines * numElementsPerLine
-              + numLines * (localIsDst ? dstLineStride : srcLineStride)
-              + (localIsDst ? dstPlaneStride : srcPlaneStride)));
+    const size_t planesCopiesPerWorkItem = 2;
+    const size_t localStorageSpacePerWorkitem = elementSize
+        * planesCopiesPerWorkItem
+        * (localIsDst ? dstPlaneStride : srcPlaneStride);
     size_t maxLocalWorkgroupSize =
         (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
 
@@ -224,42 +227,41 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
     if (maxLocalWorkgroupSize > max_workgroup_size)
         localWorkgroupSize = max_workgroup_size;
 
-    size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * srcLineStride)
-           + srcPlaneStride);
-    size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
-        / ((numLines * numElementsPerLine + numLines * dstLineStride)
-           + dstPlaneStride);
-    size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
-    size_t maxLocalWorkgroups =
+    const size_t maxTotalPlanesIn =
+        ((max_alloc_size / elementSize) + srcPlaneMargin) / srcPlaneStride;
+    const size_t maxTotalPlanesOut =
+        ((max_alloc_size / elementSize) + dstPlaneMargin) / dstPlaneStride;
+    const size_t maxTotalPlanes = std::min(maxTotalPlanesIn, maxTotalPlanesOut);
+    const size_t maxLocalWorkgroups =
         maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
 
-    size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
-        - (localIsDst ? dstPlaneStride : srcPlaneStride);
-    size_t numberOfLocalWorkgroups = std::min(1111, (int)maxLocalWorkgroups);
-    size_t totalPlanes =
+    const size_t localBufferSize =
+        localWorkgroupSize * localStorageSpacePerWorkitem
+        - (localIsDst ? dstPlaneMargin : srcPlaneMargin);
+    const size_t numberOfLocalWorkgroups =
+        std::min(1111, (int)maxLocalWorkgroups);
+    const size_t totalPlanes =
         numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
-    size_t inBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * srcLineStride)
-           + (totalPlanes - 1) * srcPlaneStride);
-    size_t outBufferSize = elementSize
-        * (totalPlanes
-               * (numLines * numElementsPerLine + numLines * dstLineStride)
-           + (totalPlanes - 1) * dstPlaneStride);
-    size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
+    const size_t inBufferSize = elementSize
+        * (totalPlanes * numLines * srcLineStride
+           + (totalPlanes - 1) * srcPlaneMargin);
+    const size_t outBufferSize = elementSize
+        * (totalPlanes * numLines * dstLineStride
+           + (totalPlanes - 1) * dstPlaneMargin);
+    const size_t globalWorkgroupSize =
+        numberOfLocalWorkgroups * localWorkgroupSize;
 
     inBuffer = (void *)malloc(inBufferSize);
     outBuffer = (void *)malloc(outBufferSize);
     outBufferCopy = (void *)malloc(outBufferSize);
 
-    cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
-        planesCopiesPerWorkgroup;
-    planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
-    numElementsPerLineInt = (int)numElementsPerLine;
-    numLinesInt = (int)numLines;
-    planesCopiesPerWorkgroup =
-        (int)(planesCopiesPerWorkItem * localWorkgroupSize);
+    const cl_int planesCopiesPerWorkItemInt =
+        static_cast<cl_int>(planesCopiesPerWorkItem);
+    const cl_int numElementsPerLineInt =
+        static_cast<cl_int>(numElementsPerLine);
+    const cl_int numLinesInt = static_cast<cl_int>(numLines);
+    const cl_int planesCopiesPerWorkgroup =
+        static_cast<cl_int>(planesCopiesPerWorkItem * localWorkgroupSize);
 
     log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
              "global out buffer %db, each work group will copy %d planes and "
@@ -336,14 +338,8 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
             for (int k = 0; k < (int)numElementsPerLine * elementSize;
                  k += elementSize)
             {
-                int inIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * srcLineStride + srcPlaneStride)
-                    + j * (numElementsPerLine + srcLineStride) + k;
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride) + k;
+                int inIdx = i * srcPlaneStride + j * srcLineStride + k;
+                int outIdx = i * dstPlaneStride + j * dstLineStride + k;
                 if (memcmp(((char *)inBuffer) + inIdx,
                            ((char *)outBuffer) + outIdx, typeSize)
                     != 0)
@@ -378,14 +374,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
             }
             if (j < (int)numLines * elementSize)
             {
-                int outIdx = i
-                        * (numLines * numElementsPerLine
-                           + numLines * dstLineStride + dstPlaneStride)
-                    + j * (numElementsPerLine + dstLineStride)
+                int outIdx = i * dstPlaneStride + j * dstLineStride
                     + numElementsPerLine * elementSize;
                 if (memcmp(((char *)outBuffer) + outIdx,
                            ((char *)outBufferCopy) + outIdx,
-                           dstLineStride * elementSize)
+                           dstLineMargin * elementSize)
                     != 0)
                 {
                     if (failuresPrinted == 0)
@@ -409,14 +402,11 @@ int test_copy3D(cl_device_id deviceID, cl_context context,
         if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
                 * elementSize)
         {
-            int outIdx = i
-                    * (numLines * numElementsPerLine + numLines * dstLineStride
-                       + dstPlaneStride)
-                + (numLines * elementSize) * (numElementsPerLine)
-                + (numLines * elementSize) * (dstLineStride);
+            int outIdx =
+                i * dstPlaneStride + numLines * dstLineStride * elementSize;
             if (memcmp(((char *)outBuffer) + outIdx,
                        ((char *)outBufferCopy) + outIdx,
-                       dstPlaneStride * elementSize)
+                       dstPlaneMargin * elementSize)
                 != 0)
             {
                 if (failuresPrinted == 0)
@@ -453,10 +443,13 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
         kChar,  kUChar, kShort,  kUShort,          kInt, kUInt, kLong,
         kULong, kFloat, kDouble, kNumExplicitTypes
     };
+    // The margins below represent the number of elements between the end of
+    // one line or plane and the start of the next. The strides are equivalent
+    // to the size of the line or plane plus the chosen margin.
     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
-    unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
-    unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
-        dstPlaneStride;
+    unsigned int smallTypesMarginSizes[] = { 0, 10, 100 };
+    unsigned int size, typeIndex, srcLineMargin, dstLineMargin, srcPlaneMargin,
+        dstPlaneMargin;
 
     int errors = 0;
 
@@ -482,33 +475,33 @@ int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
             if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
                 <= 2) // small type
             {
-                for (srcLineStride = 0;
-                     srcLineStride < sizeof(smallTypesStrideSizes)
-                         / sizeof(smallTypesStrideSizes[0]);
-                     srcLineStride++)
+                for (srcLineMargin = 0;
+                     srcLineMargin < sizeof(smallTypesMarginSizes)
+                         / sizeof(smallTypesMarginSizes[0]);
+                     srcLineMargin++)
                 {
-                    for (dstLineStride = 0;
-                         dstLineStride < sizeof(smallTypesStrideSizes)
-                             / sizeof(smallTypesStrideSizes[0]);
-                         dstLineStride++)
+                    for (dstLineMargin = 0;
+                         dstLineMargin < sizeof(smallTypesMarginSizes)
+                             / sizeof(smallTypesMarginSizes[0]);
+                         dstLineMargin++)
                     {
-                        for (srcPlaneStride = 0;
-                             srcPlaneStride < sizeof(smallTypesStrideSizes)
-                                 / sizeof(smallTypesStrideSizes[0]);
-                             srcPlaneStride++)
+                        for (srcPlaneMargin = 0;
+                             srcPlaneMargin < sizeof(smallTypesMarginSizes)
+                                 / sizeof(smallTypesMarginSizes[0]);
+                             srcPlaneMargin++)
                         {
-                            for (dstPlaneStride = 0;
-                                 dstPlaneStride < sizeof(smallTypesStrideSizes)
-                                     / sizeof(smallTypesStrideSizes[0]);
-                                 dstPlaneStride++)
+                            for (dstPlaneMargin = 0;
+                                 dstPlaneMargin < sizeof(smallTypesMarginSizes)
+                                     / sizeof(smallTypesMarginSizes[0]);
+                                 dstPlaneMargin++)
                             {
                                 if (test_copy3D(
                                         deviceID, context, queue, kernelCode,
                                         vecType[typeIndex], vecSizes[size],
-                                        smallTypesStrideSizes[srcLineStride],
-                                        smallTypesStrideSizes[dstLineStride],
-                                        smallTypesStrideSizes[srcPlaneStride],
-                                        smallTypesStrideSizes[dstPlaneStride],
+                                        smallTypesMarginSizes[srcLineMargin],
+                                        smallTypesMarginSizes[dstLineMargin],
+                                        smallTypesMarginSizes[srcPlaneMargin],
+                                        smallTypesMarginSizes[dstPlaneMargin],
                                         localIsDst))
                                 {
                                     errors++;
-- 
cgit v1.2.3


From 2cf24e63b7f1c2817a2d37b02612185c59f05faf Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 21 Jul 2022 18:54:20 +0100
Subject: Fix function name in error messages (#1450)

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp          | 3 ++-
 test_conformance/math_brute_force/binary_float.cpp           | 3 ++-
 test_conformance/math_brute_force/binary_i_double.cpp        | 3 ++-
 test_conformance/math_brute_force/binary_i_float.cpp         | 3 ++-
 test_conformance/math_brute_force/binary_operator_double.cpp | 3 ++-
 test_conformance/math_brute_force/binary_operator_float.cpp  | 3 ++-
 test_conformance/math_brute_force/macro_binary_double.cpp    | 3 ++-
 test_conformance/math_brute_force/macro_binary_float.cpp     | 3 ++-
 test_conformance/math_brute_force/macro_unary_double.cpp     | 3 ++-
 test_conformance/math_brute_force/macro_unary_float.cpp      | 3 ++-
 test_conformance/math_brute_force/unary_double.cpp           | 3 ++-
 test_conformance/math_brute_force/unary_float.cpp            | 3 ++-
 12 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index ec8eb300..e987774a 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -400,7 +400,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index a706f772..e8baccdc 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -404,7 +404,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 23a729e0..3c4ef4a4 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -403,7 +403,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 0cf7494f..dcda5f82 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -393,7 +393,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index f90a4d64..4661f240 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -395,7 +395,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 535d7209..7a239963 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -407,7 +407,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index bb036a24..a6f65ac4 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -383,7 +383,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index f8cfc9b7..3fe02629 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -375,7 +375,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 0e71f8a0..c44ebe21 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -221,7 +221,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             return error;
         }
 
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index 3b53bdb0..d9d79094 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -223,7 +223,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             return error;
         }
 
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 3430fe34..3b91b4cd 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -228,7 +228,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             return error;
         }
 
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 02a5c2cf..e5576e7e 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -256,7 +256,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             return error;
         }
 
-- 
cgit v1.2.3


From 0a5a8f90c96d6456f7c163af9c183fda0ed7af0d Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 28 Jul 2022 13:33:16 +0100
Subject: Use clProgramWrapper in math_brute_force (#1451)

Simplify code by avoiding manual resource management.

This allows removing clReleaseProgram from `MakeKernels` to reduce
behavioral differences between `MakeKernels` and `MakeKernel`.

Original patch by Marco Antognini.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp              | 9 +++++----
 test_conformance/math_brute_force/binary_float.cpp               | 9 +++++----
 test_conformance/math_brute_force/binary_i_double.cpp            | 9 +++++----
 test_conformance/math_brute_force/binary_i_float.cpp             | 9 +++++----
 test_conformance/math_brute_force/binary_operator_double.cpp     | 9 +++++----
 test_conformance/math_brute_force/binary_operator_float.cpp      | 9 +++++----
 .../math_brute_force/binary_two_results_i_double.cpp             | 8 ++++----
 test_conformance/math_brute_force/binary_two_results_i_float.cpp | 8 ++++----
 test_conformance/math_brute_force/common.h                       | 4 ++++
 test_conformance/math_brute_force/i_unary_double.cpp             | 8 ++++----
 test_conformance/math_brute_force/i_unary_float.cpp              | 8 ++++----
 test_conformance/math_brute_force/macro_binary_double.cpp        | 9 +++++----
 test_conformance/math_brute_force/macro_binary_float.cpp         | 9 +++++----
 test_conformance/math_brute_force/macro_unary_double.cpp         | 9 +++++----
 test_conformance/math_brute_force/macro_unary_float.cpp          | 9 +++++----
 test_conformance/math_brute_force/mad_double.cpp                 | 8 ++++----
 test_conformance/math_brute_force/mad_float.cpp                  | 8 ++++----
 test_conformance/math_brute_force/main.cpp                       | 1 -
 test_conformance/math_brute_force/ternary_double.cpp             | 8 ++++----
 test_conformance/math_brute_force/ternary_float.cpp              | 8 ++++----
 test_conformance/math_brute_force/unary_double.cpp               | 9 +++++----
 test_conformance/math_brute_force/unary_float.cpp                | 9 +++++----
 test_conformance/math_brute_force/unary_two_results_double.cpp   | 8 ++++----
 test_conformance/math_brute_force/unary_two_results_float.cpp    | 8 ++++----
 test_conformance/math_brute_force/unary_two_results_i_double.cpp | 8 ++++----
 test_conformance/math_brute_force/unary_two_results_i_float.cpp  | 8 ++++----
 test_conformance/math_brute_force/unary_u_double.cpp             | 8 ++++----
 test_conformance/math_brute_force/unary_u_float.cpp              | 8 ++++----
 28 files changed, 120 insertions(+), 105 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index e987774a..0869acad 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -117,7 +117,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -127,7 +127,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -150,7 +150,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -791,7 +793,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index e8baccdc..aea86ca7 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -115,7 +115,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -125,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -148,7 +148,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -948,7 +950,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 3c4ef4a4..eb94b5f7 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -116,7 +116,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -126,7 +126,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -149,7 +149,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -713,7 +715,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index dcda5f82..f6e4d2fc 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -114,7 +114,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -124,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -147,7 +147,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -706,7 +708,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 4661f240..f94b8aa4 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -116,7 +116,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -126,7 +126,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -149,7 +149,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -758,7 +760,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 7a239963..64a4c4af 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -114,7 +114,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *operator_symbol;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -124,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->operator_symbol, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -147,7 +147,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -885,7 +887,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index be7064e4..132ff593 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -121,7 +122,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -131,7 +132,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 struct ComputeReferenceInfoD
@@ -172,7 +173,7 @@ cl_int ReferenceD(cl_uint jid, cl_uint tid, void *userInfo)
 int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int64_t maxError2 = 0;
@@ -577,7 +578,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 901c8598..017ad125 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -119,7 +120,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 struct ComputeReferenceInfoF
@@ -171,7 +172,7 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -562,7 +563,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 3eafb6de..5c1f8e6d 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -16,6 +16,7 @@
 #ifndef COMMON_H
 #define COMMON_H
 
+#include "harness/typeWrappers.h"
 #include "utility.h"
 
 #include <array>
@@ -24,4 +25,7 @@
 // Array of thread-specific kernels for each vector size.
 using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
 
+// Array of programs for each vector size.
+using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
+
 #endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index f07dd78d..1900afe4 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -106,7 +107,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -116,7 +117,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -124,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     int ftz = f->ftz || gForceFTZ;
     uint64_t step = getTestStep(sizeof(cl_double), BUFFER_SIZE);
@@ -299,7 +300,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index c38bdcf9..baf0ab67 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -104,7 +105,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -114,7 +115,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -122,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
     uint64_t step = getTestStep(sizeof(float), BUFFER_SIZE);
@@ -295,7 +296,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index a6f65ac4..8f723a08 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -115,7 +115,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -125,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -143,7 +143,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -703,7 +705,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 3fe02629..bdcb0925 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -113,7 +113,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -123,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -141,7 +141,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -692,7 +694,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index c44ebe21..0d086614 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -109,7 +109,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -119,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -135,7 +135,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -476,7 +478,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index d9d79094..ea485b05 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -108,7 +108,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -118,7 +118,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -134,7 +134,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -490,7 +492,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 8e88f9f6..77428d06 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -119,7 +120,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -137,7 +138,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     double maxErrorVal = 0.0f;
@@ -298,7 +299,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 0552ba4b..9a7730f1 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -117,7 +118,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -127,7 +128,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -138,7 +139,7 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     float maxErrorVal = 0.0f;
@@ -297,7 +298,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 1a6e0c4e..d1d146a1 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -1092,7 +1092,6 @@ int MakeKernels(const char **c, cl_uint count, const char *name,
             clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
                                   sizeof(buffer), buffer, NULL);
             vlog_error("Log: %s\n", buffer);
-            clReleaseProgram(*p);
             return error;
         }
     }
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index a3db3353..c4a9a1c4 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -122,7 +123,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -132,7 +133,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 // A table of more difficult cases to get right
@@ -213,7 +214,7 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
                                          bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ;
@@ -737,7 +738,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index fdcb48c4..36d957ce 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -120,7 +121,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -130,7 +131,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 // A table of more difficult cases to get right
@@ -223,7 +224,7 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
 
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -872,7 +873,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 3b91b4cd..3351ea35 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -109,7 +109,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -119,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -137,7 +137,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -514,7 +516,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index e5576e7e..37a63732 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -107,7 +107,7 @@ struct BuildKernelInfo
     cl_uint offset; // the first vector size to build
     cl_uint kernel_count;
     KernelMatrix &kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -117,7 +117,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), info->programs + i,
+                       info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
 
@@ -135,7 +135,9 @@ struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
+
+    // Programs for various vector sizes.
+    Programs programs;
 
     // Thread-specific kernels for each vector size:
     // k[vector_size][thread_id]
@@ -691,7 +693,6 @@ exit:
     // Release
     for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
     {
-        clReleaseProgram(test_info.programs[i]);
         for (auto &kernel : test_info.k[i])
         {
             clReleaseKernel(kernel);
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 5556a080..5887f192 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -113,7 +114,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -123,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -131,7 +132,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError0 = 0.0f;
     float maxError1 = 0.0f;
@@ -443,7 +444,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index c95b10d3..fb8d5535 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -111,7 +112,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -121,7 +122,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -129,7 +130,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError0 = 0.0f;
     float maxError1 = 0.0f;
@@ -575,7 +576,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index c976061c..6f2de049 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -114,7 +115,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -124,7 +125,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -138,7 +139,7 @@ cl_ulong abs_cl_long(cl_long i)
 int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int64_t maxError2 = 0;
@@ -415,7 +416,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 7a3cd981..529da8dc 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -112,7 +113,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -122,7 +123,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -136,7 +137,7 @@ cl_ulong abs_cl_long(cl_long i)
 int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int64_t maxError2 = 0;
@@ -413,7 +414,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 621ee6bb..8113b955 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -108,7 +109,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -118,7 +119,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 cl_ulong random64(MTdata d)
@@ -131,7 +132,7 @@ cl_ulong random64(MTdata d)
 int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ;
@@ -311,7 +312,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index 0eae2e54..ee077c80 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
@@ -105,7 +106,7 @@ struct BuildKernelInfo
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
-    cl_program *programs;
+    Programs &programs;
     const char *nameInCode;
     bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
 };
@@ -115,7 +116,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       info->programs + i, info->relaxedMode);
+                       &(info->programs[i]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -123,7 +124,7 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 {
     int error;
-    cl_program programs[VECTOR_SIZE_COUNT];
+    Programs programs;
     cl_kernel kernels[VECTOR_SIZE_COUNT];
     float maxError = 0.0f;
     int ftz = f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
@@ -313,7 +314,6 @@ exit:
     for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
     {
         clReleaseKernel(kernels[k]);
-        clReleaseProgram(programs[k]);
     }
 
     return error;
-- 
cgit v1.2.3


From b06ccc6cd96fa3e3625c40cd50445ceeef38d048 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 29 Jul 2022 18:22:18 +0100
Subject: Share BuildKernelInfo struct definition (#1453)

Move the main `BuildKernelInfo` definition into `common.h` to reduce
code duplication.

Some tests (e.g. `i_unary_double.cpp`) use a different struct; rename
those structs to `BuildKernelInfo2` for now to avoid ambiguity.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp          | 10 ----------
 test_conformance/math_brute_force/binary_float.cpp           | 10 ----------
 test_conformance/math_brute_force/binary_i_double.cpp        | 10 ----------
 test_conformance/math_brute_force/binary_i_float.cpp         | 10 ----------
 test_conformance/math_brute_force/binary_operator_double.cpp | 12 +-----------
 test_conformance/math_brute_force/binary_operator_float.cpp  | 12 +-----------
 .../math_brute_force/binary_two_results_i_double.cpp         |  8 ++++----
 .../math_brute_force/binary_two_results_i_float.cpp          |  8 ++++----
 test_conformance/math_brute_force/common.h                   | 10 ++++++++++
 test_conformance/math_brute_force/i_unary_double.cpp         |  8 ++++----
 test_conformance/math_brute_force/i_unary_float.cpp          |  8 ++++----
 test_conformance/math_brute_force/macro_binary_double.cpp    | 10 ----------
 test_conformance/math_brute_force/macro_binary_float.cpp     | 10 ----------
 test_conformance/math_brute_force/macro_unary_double.cpp     | 10 ----------
 test_conformance/math_brute_force/macro_unary_float.cpp      | 10 ----------
 test_conformance/math_brute_force/mad_double.cpp             |  8 ++++----
 test_conformance/math_brute_force/mad_float.cpp              |  8 ++++----
 test_conformance/math_brute_force/ternary_double.cpp         |  8 ++++----
 test_conformance/math_brute_force/ternary_float.cpp          |  8 ++++----
 test_conformance/math_brute_force/unary_double.cpp           | 10 ----------
 test_conformance/math_brute_force/unary_float.cpp            | 10 ----------
 .../math_brute_force/unary_two_results_double.cpp            |  8 ++++----
 .../math_brute_force/unary_two_results_float.cpp             |  8 ++++----
 .../math_brute_force/unary_two_results_i_double.cpp          |  8 ++++----
 .../math_brute_force/unary_two_results_i_float.cpp           |  8 ++++----
 test_conformance/math_brute_force/unary_u_double.cpp         |  8 ++++----
 test_conformance/math_brute_force/unary_u_float.cpp          |  8 ++++----
 27 files changed, 68 insertions(+), 178 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 0869acad..3eb7dccc 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -112,16 +112,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index aea86ca7..db4604a3 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -110,16 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index eb94b5f7..37e27ac0 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -111,16 +111,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index f6e4d2fc..539e10d0 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -109,16 +109,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index f94b8aa4..7c0766be 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -111,21 +111,11 @@ int BuildKernel(const char *operator_symbol, int vectorSize,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
+    return BuildKernel(info->nameInCode, i, info->kernel_count,
                        info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 64a4c4af..fe2db19e 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -109,21 +109,11 @@ int BuildKernel(const char *operator_symbol, int vectorSize,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *operator_symbol;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
     cl_uint i = info->offset + job_id;
-    return BuildKernel(info->operator_symbol, i, info->kernel_count,
+    return BuildKernel(info->nameInCode, i, info->kernel_count,
                        info->kernels[i].data(), &(info->programs[i]),
                        info->relaxedMode);
 }
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 132ff593..9c98ebb7 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -118,7 +118,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -129,7 +129,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -192,8 +192,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 017ad125..354148ea 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -116,7 +116,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -127,7 +127,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -193,8 +193,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 5c1f8e6d..7c296952 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -28,4 +28,14 @@ using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
 // Array of programs for each vector size.
 using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
 
+struct BuildKernelInfo
+{
+    cl_uint offset; // the first vector size to build
+    cl_uint kernel_count;
+    KernelMatrix &kernels;
+    Programs &programs;
+    const char *nameInCode;
+    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
+};
+
 #endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index 1900afe4..f52a1292 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -103,7 +103,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -114,7 +114,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -143,8 +143,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index baf0ab67..633584a7 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -101,7 +101,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -112,7 +112,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -140,8 +140,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 8f723a08..624eaebb 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -110,16 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index bdcb0925..04f759cf 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -108,16 +108,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 0d086614..d0786d1b 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -104,16 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index ea485b05..b03a6003 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -103,16 +103,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 77428d06..e5ab68f6 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -116,7 +116,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -127,7 +127,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -150,8 +150,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 9a7730f1..6760ce99 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -114,7 +114,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -125,7 +125,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -149,8 +149,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index c4a9a1c4..0639b27a 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -119,7 +119,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -130,7 +130,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -229,8 +229,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 36d957ce..6f19ef7a 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -117,7 +117,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -128,7 +128,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -245,8 +245,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 3351ea35..2043e5a0 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -104,16 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 37a63732..b3b8056b 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -102,16 +102,6 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    KernelMatrix &kernels;
-    Programs &programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-};
-
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 5887f192..cf1d3e93 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -110,7 +110,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -121,7 +121,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -149,8 +149,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index fb8d5535..051aca51 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -108,7 +108,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -119,7 +119,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -148,8 +148,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float float_ulps = getAllowedUlpError(f, relaxedMode);
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 6f2de049..d45ad59d 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -111,7 +111,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -122,7 +122,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -157,8 +157,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 529da8dc..9efe861a 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -109,7 +109,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -120,7 +120,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -160,8 +160,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index 8113b955..e81ddada 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -105,7 +105,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -116,7 +116,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -145,8 +145,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index ee077c80..bfbf2cf8 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -102,7 +102,7 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
     return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
 }
 
-struct BuildKernelInfo
+struct BuildKernelInfo2
 {
     cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
@@ -113,7 +113,7 @@ struct BuildKernelInfo
 
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
-    BuildKernelInfo *info = (BuildKernelInfo *)p;
+    BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
     cl_uint i = info->offset + job_id;
     return BuildKernel(info->nameInCode, i, info->kernels + i,
                        &(info->programs[i]), info->relaxedMode);
@@ -142,8 +142,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
-                                       f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
+                                        f->nameInCode, relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
-- 
cgit v1.2.3


From 017f514c2139803bf2097714be9a7345476e9b2d Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Mon, 1 Aug 2022 10:18:36 +0100
Subject: Tidy up subgroup log messages (#1454)

Add missing newlines and improve wording of messages.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .../subgroups/subgroup_common_templates.h          |  8 +++----
 test_conformance/subgroups/subhelpers.h            | 28 +++++++++++-----------
 test_conformance/subgroups/test_queries.cpp        |  8 +++----
 .../subgroups/test_subgroup_ballot.cpp             |  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index 5051f2e9..c1a8316c 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -321,7 +321,7 @@ template <typename Ty> inline Ty calculate(Ty a, Ty b, ArithmeticOp operation)
         case ArithmeticOp::logical_and: return a && b;
         case ArithmeticOp::logical_or: return a || b;
         case ArithmeticOp::logical_xor: return !a ^ !b;
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return 0;
 }
@@ -343,7 +343,7 @@ inline cl_double calculate(cl_double a, cl_double b, ArithmeticOp operation)
         case ArithmeticOp::mul_: {
             return a * b;
         }
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return 0;
 }
@@ -365,7 +365,7 @@ inline cl_float calculate(cl_float a, cl_float b, ArithmeticOp operation)
         case ArithmeticOp::mul_: {
             return a * b;
         }
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return 0;
 }
@@ -382,7 +382,7 @@ inline subgroups::cl_half calculate(subgroups::cl_half a, subgroups::cl_half b,
         case ArithmeticOp::min_:
             return to_float(a) < to_float(b) || is_half_nan(b.data) ? a : b;
         case ArithmeticOp::mul_: return to_half(to_float(a) * to_float(b));
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return to_half(0);
 }
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index a305639a..cc03fc4c 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -86,7 +86,7 @@ struct WorkGroupParams
         if (kernel_function_name.find(name) != kernel_function_name.end())
         {
             log_info("Kernel definition duplication. Source will be "
-                     "overwritten for function name %s",
+                     "overwritten for function name %s\n",
                      name.c_str());
         }
         kernel_function_name[name] = source;
@@ -284,7 +284,7 @@ static const char *const operation_names(ArithmeticOp operation)
         case ArithmeticOp::logical_and: return "logical_and";
         case ArithmeticOp::logical_or: return "logical_or";
         case ArithmeticOp::logical_xor: return "logical_xor";
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return "";
 }
@@ -306,7 +306,7 @@ static const char *const operation_names(BallotOp operation)
         case BallotOp::gt_mask: return "gt";
         case BallotOp::le_mask: return "le";
         case BallotOp::lt_mask: return "lt";
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return "";
 }
@@ -321,7 +321,7 @@ static const char *const operation_names(ShuffleOp operation)
         case ShuffleOp::shuffle_xor: return "shuffle_xor";
         case ShuffleOp::rotate: return "rotate";
         case ShuffleOp::clustered_rotate: return "clustered_rotate";
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return "";
 }
@@ -334,7 +334,7 @@ static const char *const operation_names(NonUniformVoteOp operation)
         case NonUniformVoteOp::all_equal: return "all_equal";
         case NonUniformVoteOp::any: return "any";
         case NonUniformVoteOp::elect: return "elect";
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return "";
 }
@@ -347,7 +347,7 @@ static const char *const operation_names(SubgroupsBroadcastOp operation)
         case SubgroupsBroadcastOp::broadcast_first: return "broadcast_first";
         case SubgroupsBroadcastOp::non_uniform_broadcast:
             return "non_uniform_broadcast";
-        default: log_error("Unknown operation request"); break;
+        default: log_error("Unknown operation request\n"); break;
     }
     return "";
 }
@@ -524,7 +524,7 @@ template <typename Ty> struct CommonTypeManager
             case ArithmeticOp::and_: return (Ty)~0;
             case ArithmeticOp::or_: return (Ty)0;
             case ArithmeticOp::xor_: return (Ty)0;
-            default: log_error("Unknown operation request"); break;
+            default: log_error("Unknown operation request\n"); break;
         }
         return 0;
     }
@@ -552,7 +552,7 @@ template <> struct TypeManager<cl_int> : public CommonTypeManager<cl_int>
             case ArithmeticOp::logical_and: return (cl_int)1;
             case ArithmeticOp::logical_or: return (cl_int)0;
             case ArithmeticOp::logical_xor: return (cl_int)0;
-            default: log_error("Unknown operation request"); break;
+            default: log_error("Unknown operation request\n"); break;
         }
         return 0;
     }
@@ -966,7 +966,7 @@ template <> struct TypeManager<cl_float> : public CommonTypeManager<cl_float>
             case ArithmeticOp::min_:
                 return std::numeric_limits<float>::infinity();
             case ArithmeticOp::mul_: return (cl_float)1;
-            default: log_error("Unknown operation request"); break;
+            default: log_error("Unknown operation request\n"); break;
         }
         return 0;
     }
@@ -1025,7 +1025,7 @@ template <> struct TypeManager<cl_double> : public CommonTypeManager<cl_double>
             case ArithmeticOp::min_:
                 return std::numeric_limits<double>::infinity();
             case ArithmeticOp::mul_: return (cl_double)1;
-            default: log_error("Unknown operation request"); break;
+            default: log_error("Unknown operation request\n"); break;
         }
         return 0;
     }
@@ -1112,7 +1112,7 @@ struct TypeManager<subgroups::cl_half>
             case ArithmeticOp::max_: return { 0xfc00 };
             case ArithmeticOp::min_: return { 0x7c00 };
             case ArithmeticOp::mul_: return { 0x3c00 };
-            default: log_error("Unknown operation request"); break;
+            default: log_error("Unknown operation request\n"); break;
         }
         return { 0 };
     }
@@ -1566,7 +1566,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
         if (clGetKernelSubGroupInfo_ptr == NULL)
         {
-            log_error("ERROR: %s function not available",
+            log_error("ERROR: %s function not available\n",
                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
             return TEST_FAIL;
         }
@@ -1576,7 +1576,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         if (error != CL_SUCCESS)
         {
             log_error("ERROR: %s function error for "
-                      "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+                      "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n",
                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
             return TEST_FAIL;
         }
@@ -1589,7 +1589,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         if (error != CL_SUCCESS)
         {
             log_error("ERROR: %s function error for "
-                      "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+                      "CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n",
                       subgroupsApiSet.clGetKernelSubGroupInfo_name);
             return TEST_FAIL;
         }
diff --git a/test_conformance/subgroups/test_queries.cpp b/test_conformance/subgroups/test_queries.cpp
index 761ca7a6..6b940935 100644
--- a/test_conformance/subgroups/test_queries.cpp
+++ b/test_conformance/subgroups/test_queries.cpp
@@ -100,7 +100,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
         subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
     if (clGetKernelSubGroupInfo_ptr == NULL)
     {
-        log_error("ERROR: %s function not available",
+        log_error("ERROR: %s function not available\n",
                   subgroupsApiSet.clGetKernelSubGroupInfo_name);
         return TEST_FAIL;
     }
@@ -112,7 +112,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
     if (error != CL_SUCCESS)
     {
         log_error("ERROR: %s function error for "
-                  "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
+                  "CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE\n",
                   subgroupsApiSet.clGetKernelSubGroupInfo_name);
         return TEST_FAIL;
     }
@@ -133,7 +133,7 @@ int test_sub_group_info(cl_device_id device, cl_context context,
     if (error != CL_SUCCESS)
     {
         log_error("ERROR: %s function error "
-                  "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
+                  "for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE\n",
                   subgroupsApiSet.clGetKernelSubGroupInfo_name);
         return TEST_FAIL;
     }
@@ -209,4 +209,4 @@ int test_sub_group_info_ext(cl_device_id device, cl_context context,
     }
 
     return test_sub_group_info(device, context, queue, num_elements, false);
-}
\ No newline at end of file
+}
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index b1e6944f..b35520e6 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -502,7 +502,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
                 }
                 else
                 {
-                    log_error("Unknown operation...");
+                    log_error("Unknown operation...\n");
                 }
             }
 
-- 
cgit v1.2.3


From bd03e17a56c86116d6254bd56ead2fa84710f919 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Mon, 1 Aug 2022 10:18:53 +0100
Subject: Add missing external memory/sync extensions to list of known khr
 extensions (#1455)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Kévin Petit <kpet@free.fr>
---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 84b7798f..4e5b2841 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -77,11 +77,14 @@ const char *known_extensions[] = {
     "cl_khr_spirv_linkonce_odr",
     "cl_khr_semaphore",
     "cl_khr_external_semaphore",
+    "cl_khr_external_semaphore_opaque_fd",
     "cl_khr_external_semaphore_sync_fd",
     "cl_khr_command_buffer",
+    "cl_khr_external_memory",
+    "cl_khr_external_memory_opaque_fd",
 };
 
-size_t num_known_extensions = sizeof(known_extensions) / sizeof(char *);
+size_t num_known_extensions = ARRAY_SIZE(known_extensions);
 size_t first_API_extension = 29;
 
 const char *known_embedded_extensions[] = {
-- 
cgit v1.2.3


From cdf5a105fcdc32b9203c94e0623e36f64117e6a8 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 2 Aug 2022 18:16:03 +0100
Subject: Fix misleading indentation and enable -Wmisleading-indentation
 (#1458)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 CMakeLists.txt                                     |  1 +
 test_conformance/basic/test_arraycopy.cpp          |  5 ++--
 .../basic/test_multireadimageonefmt.cpp            | 24 +++++++++----------
 test_conformance/basic/test_writeimage_fp32.cpp    |  7 +++---
 test_conformance/basic/test_writeimage_int16.cpp   |  7 +++---
 test_conformance/commonfns/test_sign.cpp           | 13 +++++------
 test_conformance/commonfns/test_step.cpp           | 27 ++++++++++------------
 test_conformance/events/test_callbacks.cpp         | 25 ++++++++++----------
 test_conformance/half/Test_vStoreHalf.cpp          |  6 ++---
 .../images/clReadWriteImage/test_read_1D.cpp       | 19 ++++++++-------
 .../test_multiple_devices.cpp                      |  5 ++--
 test_conformance/profiling/execute.cpp             |  4 ++--
 test_conformance/profiling/writeImage.cpp          |  4 ++--
 13 files changed, 72 insertions(+), 75 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8f5f4472..fe56d0fa 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -88,6 +88,7 @@ macro(add_cxx_flag_if_supported flag)
 endmacro(add_cxx_flag_if_supported)
 
 if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+    add_cxx_flag_if_supported(-Wmisleading-indentation)
     add_cxx_flag_if_supported(-Wno-narrowing)
     add_cxx_flag_if_supported(-Wno-format)
     add_cxx_flag_if_supported(-Werror)
diff --git a/test_conformance/basic/test_arraycopy.cpp b/test_conformance/basic/test_arraycopy.cpp
index 5a352869..d9dbcc1b 100644
--- a/test_conformance/basic/test_arraycopy.cpp
+++ b/test_conformance/basic/test_arraycopy.cpp
@@ -181,9 +181,8 @@ test_arraycopy(cl_device_id device, cl_context context, cl_command_queue queue,
         }
     }
 
-  // Keep track of multiple errors.
-  if (error_count != 0)
-    err = error_count;
+    // Keep track of multiple errors.
+    if (error_count != 0) err = error_count;
 
     if (err)
         log_error("\tCL_MEM_USE_HOST_PTR buffer with kernel copy FAILED\n");
diff --git a/test_conformance/basic/test_multireadimageonefmt.cpp b/test_conformance/basic/test_multireadimageonefmt.cpp
index b37c8414..c230e67a 100644
--- a/test_conformance/basic/test_multireadimageonefmt.cpp
+++ b/test_conformance/basic/test_multireadimageonefmt.cpp
@@ -153,14 +153,14 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue
   err  = clSetKernelArg(kernel, 0, sizeof i, &i);
   err |= clSetKernelArg(kernel, 1, sizeof err, &err);
   err |= clSetKernelArg(kernel, 2, sizeof sampler, &sampler);
-  for (i=0; i<8; i++)
-    err |= clSetKernelArg(kernel, 3+i, sizeof streams[i], &streams[i]);
+  for (i = 0; i < 8; i++)
+      err |= clSetKernelArg(kernel, 3 + i, sizeof streams[i], &streams[i]);
 
-    if (err != CL_SUCCESS)
-    {
-        log_error("clSetKernelArgs failed\n");
-        return -1;
-    }
+  if (err != CL_SUCCESS)
+  {
+      log_error("clSetKernelArgs failed\n");
+      return -1;
+  }
 
     threads[0] = (unsigned int)img_width;
     threads[1] = (unsigned int)img_height;
@@ -182,15 +182,13 @@ int test_mri_one(cl_device_id device, cl_context context, cl_command_queue queue
 
     // cleanup
   clReleaseSampler(sampler);
-  for (i=0; i<8; i++)
-    clReleaseMemObject(streams[i]);
+  for (i = 0; i < 8; i++) clReleaseMemObject(streams[i]);
   clReleaseKernel(kernel);
   clReleaseProgram(program);
-  for (i=0; i<7; i++)
-    free(input_ptr[i]);
-    free(output_ptr);
+  for (i = 0; i < 7; i++) free(input_ptr[i]);
+  free(output_ptr);
 
-    return err;
+  return err;
 }
 
 
diff --git a/test_conformance/basic/test_writeimage_fp32.cpp b/test_conformance/basic/test_writeimage_fp32.cpp
index fef71874..c68463ac 100644
--- a/test_conformance/basic/test_writeimage_fp32.cpp
+++ b/test_conformance/basic/test_writeimage_fp32.cpp
@@ -122,9 +122,10 @@ int test_writeimage_fp32(cl_device_id device, cl_context context, cl_command_que
         return -1;
     }
 
-  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgbaFFFF_write_kernel_code, "test_rgbaFFFF_write" );
-  if (err)
-    return -1;
+    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
+                                      &rgbaFFFF_write_kernel_code,
+                                      "test_rgbaFFFF_write");
+    if (err) return -1;
     kernel[1] = clCreateKernel(program, "test_rgbaFFFF_write", NULL);
     if (!kernel[1])
     {
diff --git a/test_conformance/basic/test_writeimage_int16.cpp b/test_conformance/basic/test_writeimage_int16.cpp
index 8afb77a9..d863a3a3 100644
--- a/test_conformance/basic/test_writeimage_int16.cpp
+++ b/test_conformance/basic/test_writeimage_int16.cpp
@@ -128,9 +128,10 @@ int test_writeimage_int16(cl_device_id device, cl_context context, cl_command_qu
         return -1;
     }
 
-  err = create_single_kernel_helper(context, &program, &kernel[0], 1, &rgba16_write_kernel_code, "test_rgba16_write" );
-  if (err)
-    return -1;
+    err = create_single_kernel_helper(context, &program, &kernel[0], 1,
+                                      &rgba16_write_kernel_code,
+                                      "test_rgba16_write");
+    if (err) return -1;
     kernel[1] = clCreateKernel(program, "test_rgba16_write", NULL);
     if (!kernel[1])
     {
diff --git a/test_conformance/commonfns/test_sign.cpp b/test_conformance/commonfns/test_sign.cpp
index 1b842e35..6dba58da 100644
--- a/test_conformance/commonfns/test_sign.cpp
+++ b/test_conformance/commonfns/test_sign.cpp
@@ -223,14 +223,13 @@ test_sign(cl_device_id device, cl_context context, cl_command_queue queue, int n
   free(input_ptr[0]);
   free(output_ptr);
 
-  if(err)
-    return err;
+  if (err) return err;
 
-    if( ! is_extension_available( device, "cl_khr_fp64"))
-    {
-        log_info( "skipping double test -- cl_khr_fp64 not supported.\n" );
-        return 0;
-    }
+  if (!is_extension_available(device, "cl_khr_fp64"))
+  {
+      log_info("skipping double test -- cl_khr_fp64 not supported.\n");
+      return 0;
+  }
 
     return test_sign_double( device, context, queue, n_elems);
 }
diff --git a/test_conformance/commonfns/test_step.cpp b/test_conformance/commonfns/test_step.cpp
index 0e3cfe07..330083b2 100644
--- a/test_conformance/commonfns/test_step.cpp
+++ b/test_conformance/commonfns/test_step.cpp
@@ -158,23 +158,20 @@ test_step(cl_device_id device, cl_context context, cl_command_queue queue, int n
     }
 
     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &step_kernel_code, "test_step" );
-    if (err)
-        return -1;
+    if (err) return -1;
     err = create_single_kernel_helper( context, &program[1], &kernel[1], 1, &step2_kernel_code, "test_step2" );
-    if (err)
-        return -1;
+    if (err) return -1;
     err = create_single_kernel_helper( context, &program[2], &kernel[2], 1, &step4_kernel_code, "test_step4" );
-    if (err)
-        return -1;
-  err = create_single_kernel_helper( context, &program[3], &kernel[3], 1, &step8_kernel_code, "test_step8" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[4], &kernel[4], 1, &step16_kernel_code, "test_step16" );
-  if (err)
-    return -1;
-  err = create_single_kernel_helper( context, &program[5], &kernel[5], 1, &step3_kernel_code, "test_step3" );
-  if (err)
-    return -1;
+    if (err) return -1;
+    err = create_single_kernel_helper(context, &program[3], &kernel[3], 1,
+                                      &step8_kernel_code, "test_step8");
+    if (err) return -1;
+    err = create_single_kernel_helper(context, &program[4], &kernel[4], 1,
+                                      &step16_kernel_code, "test_step16");
+    if (err) return -1;
+    err = create_single_kernel_helper(context, &program[5], &kernel[5], 1,
+                                      &step3_kernel_code, "test_step3");
+    if (err) return -1;
 
     values[0] = streams[0];
     values[1] = streams[1];
diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
index 2ffb9ca7..6025afb7 100644
--- a/test_conformance/events/test_callbacks.cpp
+++ b/test_conformance/events/test_callbacks.cpp
@@ -110,11 +110,12 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm
     {
         usleep( 100000 );    // 1/10th second
 
-    int cc=0;
-    for( int k=0;k< EVENT_CALLBACK_TYPE_TOTAL;k++)
-        if (sCallbackTriggered_flag[k]) {
-            cc++;
-        }
+        int cc = 0;
+        for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++)
+            if (sCallbackTriggered_flag[k])
+            {
+                cc++;
+            }
 
         if  (cc== EVENT_CALLBACK_TYPE_TOTAL  )
         {
@@ -260,8 +261,8 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c
     if (actionEvents == NULL)
     {
         log_error(" memory error in test_callbacks_simultaneous  \n");
-      for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
-        if (actions[i]) delete actions[i];
+        for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+            if (actions[i]) delete actions[i];
         return  -1;
     }
 
@@ -317,11 +318,11 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c
         usleep( 100000 );    // 1/10th second
         if( ((last_count = sSimultaneousCount)) == total_callbacks )
         {
-      // All of the callbacks were executed
-      if (actionEvents) delete [] actionEvents;
-      for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
-        if (actions[i]) delete actions[i];
-        return 0;
+            // All of the callbacks were executed
+            if (actionEvents) delete[] actionEvents;
+            for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+                if (actions[i]) delete actions[i];
+            return 0;
         }
     }
 
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index 3ca5920b..b1491025 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -117,8 +117,7 @@ CheckF(cl_uint jid, cl_uint tid, void *userInfo)
         return 0;
 
     for (j = 0; j < count; j++) {
-    if (s[j] == r[j])
-        continue;
+        if (s[j] == r[j]) continue;
 
         // Pass any NaNs
         if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00 )
@@ -189,8 +188,7 @@ CheckD(cl_uint jid, cl_uint tid, void *userInfo)
         return 0;
 
     for (j = 0; j < count; j++) {
-    if (s[j] == r[j])
-        continue;
+        if (s[j] == r[j]) continue;
 
         // Pass any NaNs
         if ((s[j] & 0x7fff) > 0x7c00 && (r[j] & 0x7fff) > 0x7c00)
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index eef5bf4e..2a42a70e 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -90,14 +90,17 @@ int test_read_image_1D(cl_context context, cl_command_queue queue,
 
     region[0] = width_lod;
 
-    if ( gDebugTrace )
-      if ( gTestMipmaps) {
-        log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
-      }
-      error = clEnqueueWriteImage(queue, image, CL_FALSE,
-        origin, region, ( gEnablePitch ? row_pitch_lod : 0 ), 0,
-        (char*)imageValues + imgValMipLevelOffset, 0, NULL, NULL);
-      if (error != CL_SUCCESS) {
+    if (gDebugTrace)
+        if (gTestMipmaps)
+        {
+            log_info(" - Working at mipLevel :%llu\n", (unsigned long long)lod);
+        }
+    error = clEnqueueWriteImage(queue, image, CL_FALSE, origin, region,
+                                (gEnablePitch ? row_pitch_lod : 0), 0,
+                                (char *)imageValues + imgValMipLevelOffset, 0,
+                                NULL, NULL);
+    if (error != CL_SUCCESS)
+    {
         log_error( "ERROR: Unable to write to 1D image of size %d \n", (int)width_lod );
         return -1;
     }
diff --git a/test_conformance/multiple_device_context/test_multiple_devices.cpp b/test_conformance/multiple_device_context/test_multiple_devices.cpp
index 59543ade..4f187b9c 100644
--- a/test_conformance/multiple_device_context/test_multiple_devices.cpp
+++ b/test_conformance/multiple_device_context/test_multiple_devices.cpp
@@ -175,9 +175,8 @@ int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices
   }
 
     /* All done now! */
-  if (errors)
-    return -1;
-    return 0;
+  if (errors) return -1;
+  return 0;
 }
 
 int test_two_devices(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
diff --git a/test_conformance/profiling/execute.cpp b/test_conformance/profiling/execute.cpp
index 0541bfa5..44b1bcd4 100644
--- a/test_conformance/profiling/execute.cpp
+++ b/test_conformance/profiling/execute.cpp
@@ -335,8 +335,8 @@ static int kernelFilter( cl_device_id device, cl_context context, cl_command_que
     clReleaseMemObject( memobjs[1] );
     clReleaseMemObject( memobjs[0] );
 
-  if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
-    err = -1;
+    if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+        err = -1;
 
     return err;
 
diff --git a/test_conformance/profiling/writeImage.cpp b/test_conformance/profiling/writeImage.cpp
index fbc8fbcd..ec2fbdaf 100644
--- a/test_conformance/profiling/writeImage.cpp
+++ b/test_conformance/profiling/writeImage.cpp
@@ -628,8 +628,8 @@ int write_image( cl_device_id device, cl_context context, cl_command_queue queue
     free( dst );
     free( inptr );
 
-  if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
-    err = -1;
+    if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
+        err = -1;
 
     return err;
 
-- 
cgit v1.2.3


From c44b5b6ae3c1d762d73c5e16eb51e244f9d708a2 Mon Sep 17 00:00:00 2001
From: Matthias Diener <matthias.diener@gmail.com>
Date: Tue, 2 Aug 2022 14:31:24 -0500
Subject: Fix indentation of test_waitlists.cpp (#1459)

* fix indentation of test_waitlists.cpp

Followup of #1458

* run formatter
---
 test_conformance/events/test_waitlists.cpp | 171 +++++++++++++++++------------
 1 file changed, 101 insertions(+), 70 deletions(-)

diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp
index e23cacf4..ebf5da9b 100644
--- a/test_conformance/events/test_waitlists.cpp
+++ b/test_conformance/events/test_waitlists.cpp
@@ -28,10 +28,13 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     cl_int status[ 3 ];
     cl_int error;
 
-  if (multiple)
-    log_info("\tExecuting reference event 0, then reference event 1 with reference event 0 in its waitlist, then test event 2 with reference events 0 and 1 in its waitlist.\n");
-  else
-    log_info("\tExecuting reference event 0, then test event 2 with reference event 0 in its waitlist.\n");
+    if (multiple)
+        log_info("\tExecuting reference event 0, then reference event 1 with "
+                 "reference event 0 in its waitlist, then test event 2 with "
+                 "reference events 0 and 1 in its waitlist.\n");
+    else
+        log_info("\tExecuting reference event 0, then test event 2 with "
+                 "reference event 0 in its waitlist.\n");
 
     // Set up the first base action to wait against
     error = actions[ 0 ].Setup( device, context, queue );
@@ -49,7 +52,7 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     test_error( error, "Unable to set up test event" );
 
     // Execute all events now
-  if (PRINT_OPS) log_info("\tExecuting action 0...\n");
+    if (PRINT_OPS) log_info("\tExecuting action 0...\n");
     error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] );
     test_error( error, "Unable to execute first event" );
 
@@ -61,17 +64,20 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     }
 
     // Sanity check
-  if( multiple ) {
-    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+    if (multiple)
+    {
+        if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
         error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
-    test_error( error, "Unable to get event status" );
-  }
-  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+        test_error(error, "Unable to get event status");
+    }
+    if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
     error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
-  test_error( error, "Unable to get event status" );
+    test_error(error, "Unable to get event status");
 
-  log_info("\t\tEvent status after starting reference events: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
-           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), "N/A");
+    log_info("\t\tEvent status after starting reference events: reference "
+             "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+             IGetStatusString(status[0]),
+             (multiple ? IGetStatusString(status[1]) : "N/A"), "N/A");
 
     if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) )
     {
@@ -79,25 +85,29 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
         return 0;
     }
 
-  if (PRINT_OPS) log_info("\tExecuting action to test...\n");
+    if (PRINT_OPS) log_info("\tExecuting action to test...\n");
     error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] );
     test_error( error, "Unable to execute test event" );
 
     // Hopefully, the first event is still running
-  if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+    if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
     error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
     test_error( error, "Unable to get event status" );
-  if( multiple ) {
-    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+    if (multiple)
+    {
+        if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
         error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
-    test_error( error, "Unable to get event status" );
-  }
-  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+        test_error(error, "Unable to get event status");
+    }
+    if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
     error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
-  test_error( error, "Unable to get event status" );
+    test_error(error, "Unable to get event status");
 
-  log_info("\t\tEvent status after starting test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
-           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+    log_info("\t\tEvent status after starting test event: reference event 0: "
+             "%s, reference event 1: %s, test event 2: %s.\n",
+             IGetStatusString(status[0]),
+             (multiple ? IGetStatusString(status[1]) : "N/A"),
+             IGetStatusString(status[2]));
 
     if( multiple )
     {
@@ -108,12 +118,15 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
             return 0;
         }
 
-    if(status[1] == CL_COMPLETE && status[0] != CL_COMPLETE)
-    {
-      log_error("ERROR: Test failed because the second wait event is complete and the first is not.(status: 0: %s and 1: %s)\n", IGetStatusString( status[ 0 ] ), IGetStatusString( status[ 1 ] ) );
+        if (status[1] == CL_COMPLETE && status[0] != CL_COMPLETE)
+        {
+            log_error(
+                "ERROR: Test failed because the second wait event is complete "
+                "and the first is not.(status: 0: %s and 1: %s)\n",
+                IGetStatusString(status[0]), IGetStatusString(status[1]));
             clFinish( queue );
             return -1;
-    }
+        }
     }
     else
     {
@@ -139,25 +152,29 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     }
 
     // Now wait for the first reference event
-  if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+    if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
     error = clWaitForEvents( 1, &events[ 0 ] );
     test_error( error, "Unable to wait for reference event" );
 
     // Grab statuses again
-  if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+    if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
     error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
     test_error( error, "Unable to get event status" );
-  if( multiple ) {
-    if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+    if (multiple)
+    {
+        if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
         error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
-    test_error( error, "Unable to get event status" );
-  }
-  if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+        test_error(error, "Unable to get event status");
+    }
+    if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
     error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
-  test_error( error, "Unable to get event status" );
+    test_error(error, "Unable to get event status");
 
-  log_info("\t\tEvent status after waiting for reference event 0: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
-           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+    log_info("\t\tEvent status after waiting for reference event 0: reference "
+             "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+             IGetStatusString(status[0]),
+             (multiple ? IGetStatusString(status[1]) : "N/A"),
+             IGetStatusString(status[2]));
 
     // Sanity
     if( status[ 0 ] != CL_COMPLETE )
@@ -170,11 +187,12 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     // If we're multiple, and the second event isn't complete, then our test event should still be queued
     if( multiple && status[ 1 ] != CL_COMPLETE )
     {
-    if( status[ 1 ] == CL_RUNNING && status[ 2 ] == CL_RUNNING ) {
-      log_error("ERROR: Test event and second event are both running.\n");
-      clFinish( queue );
-      return -1;
-    }
+        if (status[1] == CL_RUNNING && status[2] == CL_RUNNING)
+        {
+            log_error("ERROR: Test event and second event are both running.\n");
+            clFinish(queue);
+            return -1;
+        }
         if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
         {
             log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) );
@@ -183,25 +201,33 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
         }
 
         // Now wait for second event to complete, too
-    if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
+        if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
         error = clWaitForEvents( 1, &events[ 1 ] );
         test_error( error, "Unable to wait for second reference event" );
 
         // Grab statuses again
-    if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
-    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
-    test_error( error, "Unable to get event status" );
-    if( multiple ) {
-      if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
-      error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
-      test_error( error, "Unable to get event status" );
-    }
-    if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
-    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
-    test_error( error, "Unable to get event status" );
-
-    log_info("\t\tEvent status after waiting for reference event 1: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
-             IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+        if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
+        error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status[2]), &status[2], NULL);
+        test_error(error, "Unable to get event status");
+        if (multiple)
+        {
+            if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
+            error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                   sizeof(status[1]), &status[1], NULL);
+            test_error(error, "Unable to get event status");
+        }
+        if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
+        error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status[0]), &status[0], NULL);
+        test_error(error, "Unable to get event status");
+
+        log_info(
+            "\t\tEvent status after waiting for reference event 1: reference "
+            "event 0: %s, reference event 1: %s, test event 2: %s.\n",
+            IGetStatusString(status[0]),
+            (multiple ? IGetStatusString(status[1]) : "N/A"),
+            IGetStatusString(status[2]));
 
         // Sanity
         if( status[ 1 ] != CL_COMPLETE )
@@ -227,25 +253,30 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     }
 
     // Wait for the test event, then return
-  if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
+    if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
     error = clWaitForEvents( 1, &events[ 2 ] );
     test_error( error, "Unable to wait for test event" );
 
-  error |= clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
-  test_error( error, "Unable to get event status" );
+    error |= clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                            sizeof(status[2]), &status[2], NULL);
+    test_error(error, "Unable to get event status");
 
-  log_info("\t\tEvent status after waiting for test event: reference event 0: %s, reference event 1: %s, test event 2: %s.\n",
-           IGetStatusString( status[ 0 ] ), (multiple ? IGetStatusString( status[ 1 ] ) : "N/A"), IGetStatusString( status[ 2 ] ));
+    log_info("\t\tEvent status after waiting for test event: reference event "
+             "0: %s, reference event 1: %s, test event 2: %s.\n",
+             IGetStatusString(status[0]),
+             (multiple ? IGetStatusString(status[1]) : "N/A"),
+             IGetStatusString(status[2]));
 
-  // Sanity
-  if( status[ 2 ] != CL_COMPLETE )
-  {
-    log_error( "ERROR: Test event didn't complete (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
-    clFinish( queue );
-    return -1;
-  }
+    // Sanity
+    if (status[2] != CL_COMPLETE)
+    {
+        log_error("ERROR: Test event didn't complete (status: 2: %s)\n",
+                  IGetStatusString(status[2]));
+        clFinish(queue);
+        return -1;
+    }
 
-  clFinish(queue);
+    clFinish(queue);
     return 0;
 }
 
-- 
cgit v1.2.3


From c12bff46c605b9326908c9aaf4e50a5e6e81d166 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 4 Aug 2022 15:03:52 +0100
Subject: Tidy up BuildKernelInfo (#1461)

Remove the `offset` field from both structures, because it was always
set to the global `gMinVectorSizeIndex`.

Improve documentation and rename some variables:
 - `i` becomes `vectorSize`;
 - `kernel_count` becomes `threadCount`.

Original patch by Marco Antognini.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp       | 15 +++++++--------
 test_conformance/math_brute_force/binary_float.cpp        | 15 +++++++--------
 test_conformance/math_brute_force/binary_i_double.cpp     | 15 +++++++--------
 test_conformance/math_brute_force/binary_i_float.cpp      | 15 +++++++--------
 .../math_brute_force/binary_operator_double.cpp           | 15 +++++++--------
 .../math_brute_force/binary_operator_float.cpp            | 15 +++++++--------
 .../math_brute_force/binary_two_results_i_double.cpp      | 11 +++++------
 .../math_brute_force/binary_two_results_i_float.cpp       | 11 +++++------
 test_conformance/math_brute_force/common.h                | 13 ++++++++++---
 test_conformance/math_brute_force/i_unary_double.cpp      | 11 +++++------
 test_conformance/math_brute_force/i_unary_float.cpp       | 11 +++++------
 test_conformance/math_brute_force/macro_binary_double.cpp | 15 +++++++--------
 test_conformance/math_brute_force/macro_binary_float.cpp  | 15 +++++++--------
 test_conformance/math_brute_force/macro_unary_double.cpp  | 15 +++++++--------
 test_conformance/math_brute_force/macro_unary_float.cpp   | 15 +++++++--------
 test_conformance/math_brute_force/mad_double.cpp          | 11 +++++------
 test_conformance/math_brute_force/mad_float.cpp           | 11 +++++------
 test_conformance/math_brute_force/ternary_double.cpp      | 11 +++++------
 test_conformance/math_brute_force/ternary_float.cpp       | 11 +++++------
 test_conformance/math_brute_force/unary_double.cpp        | 15 +++++++--------
 test_conformance/math_brute_force/unary_float.cpp         | 15 +++++++--------
 .../math_brute_force/unary_two_results_double.cpp         | 11 +++++------
 .../math_brute_force/unary_two_results_float.cpp          | 11 +++++------
 .../math_brute_force/unary_two_results_i_double.cpp       | 11 +++++------
 .../math_brute_force/unary_two_results_i_float.cpp        | 11 +++++------
 test_conformance/math_brute_force/unary_u_double.cpp      | 11 +++++------
 test_conformance/math_brute_force/unary_u_float.cpp       | 11 +++++------
 27 files changed, 164 insertions(+), 183 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 3eb7dccc..034b325a 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -115,10 +115,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -741,10 +741,9 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index db4604a3..7abaa0e4 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -113,10 +113,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -898,10 +898,9 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 37e27ac0..bba93617 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -114,10 +114,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -663,10 +663,9 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 539e10d0..4821830c 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -112,10 +112,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -656,10 +656,9 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 7c0766be..09c560e9 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -114,10 +114,10 @@ int BuildKernel(const char *operator_symbol, int vectorSize,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -708,10 +708,9 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index fe2db19e..f2e57bc1 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -112,10 +112,10 @@ int BuildKernel(const char *operator_symbol, int vectorSize,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -835,10 +835,9 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 9c98ebb7..59a5bfe2 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -120,7 +120,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -130,9 +129,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 struct ComputeReferenceInfoD
@@ -192,8 +191,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 354148ea..6c1dd3bc 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -118,7 +118,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -128,9 +127,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 struct ComputeReferenceInfoF
@@ -193,8 +192,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 7c296952..f0d18dd9 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -28,14 +28,21 @@ using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
 // Array of programs for each vector size.
 using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
 
+// Information to generate OpenCL kernels.
 struct BuildKernelInfo
 {
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
+    // Number of kernels to build, one for each thread to avoid data races.
+    cl_uint threadCount;
+
     KernelMatrix &kernels;
+
     Programs &programs;
+
+    // Function, macro or symbol tested by the kernel.
     const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
+
+    // Whether to build with -cl-fast-relaxed-math.
+    bool relaxedMode;
 };
 
 #endif /* COMMON_H */
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index f52a1292..a05737da 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -105,7 +105,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -115,9 +114,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -143,8 +142,8 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 633584a7..13442e61 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -103,7 +103,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -113,9 +112,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -140,8 +139,8 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 624eaebb..88b0f86c 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -113,10 +113,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -666,10 +666,9 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 04f759cf..6199dd1a 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -111,10 +111,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -655,10 +655,9 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index d0786d1b..b7fb8a96 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -107,10 +107,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -439,10 +439,9 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index b03a6003..e4c22369 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -106,10 +106,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -453,10 +453,9 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index e5ab68f6..3def6a80 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -118,7 +118,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -128,9 +127,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -150,8 +149,8 @@ int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 6760ce99..498f25eb 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -116,7 +116,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -126,9 +125,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -149,8 +148,8 @@ int TestFunc_mad_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 0639b27a..94fbe268 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -121,7 +121,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -131,9 +130,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // A table of more difficult cases to get right
@@ -229,8 +228,8 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 6f19ef7a..762c57de 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -119,7 +119,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -129,9 +128,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // A table of more difficult cases to get right
@@ -245,8 +244,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 2043e5a0..76bcfd46 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -107,10 +107,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -465,10 +465,9 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index b3b8056b..d310054d 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -105,10 +105,10 @@ int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i].data(), &(info->programs[i]),
-                       info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
@@ -636,10 +636,9 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index cf1d3e93..858b2c35 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -112,7 +112,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -122,9 +121,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -149,8 +148,8 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 051aca51..85e5d014 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -110,7 +110,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -120,9 +119,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -148,8 +147,8 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
     float float_ulps = getAllowedUlpError(f, relaxedMode);
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index d45ad59d..4cfbca9c 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -113,7 +113,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -123,9 +122,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -157,8 +156,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index 9efe861a..e324ad09 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -111,7 +111,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -121,9 +120,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 cl_ulong abs_cl_long(cl_long i)
@@ -160,8 +159,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index e81ddada..a0c6b793 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -107,7 +107,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -117,9 +116,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 cl_ulong random64(MTdata d)
@@ -145,8 +144,8 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index bfbf2cf8..ccfbc3be 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -104,7 +104,6 @@ int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
 
 struct BuildKernelInfo2
 {
-    cl_uint offset; // the first vector size to build
     cl_kernel *kernels;
     Programs &programs;
     const char *nameInCode;
@@ -114,9 +113,9 @@ struct BuildKernelInfo2
 cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo2 *info = (BuildKernelInfo2 *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernels + i,
-                       &(info->programs[i]), info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->kernels + vectorSize,
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 } // anonymous namespace
@@ -142,8 +141,8 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
 
     // Init the kernels
     {
-        BuildKernelInfo2 build_info = { gMinVectorSizeIndex, kernels, programs,
-                                        f->nameInCode, relaxedMode };
+        BuildKernelInfo2 build_info{ kernels, programs, f->nameInCode,
+                                     relaxedMode };
         if ((error = ThreadPool_Do(BuildKernelFn,
                                    gMaxVectorSizeIndex - gMinVectorSizeIndex,
                                    &build_info)))
-- 
cgit v1.2.3


From 4ee8022230f2cde0cc59a327f85dc31ccb34f778 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Thu, 4 Aug 2022 15:04:14 +0100
Subject: Remove unused variables in subgroup tests (#1460)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/subgroups/subgroup_common_templates.h  |  2 +-
 test_conformance/subgroups/subhelpers.h                 |  1 -
 test_conformance/subgroups/test_barrier.cpp             |  1 -
 test_conformance/subgroups/test_subgroup.cpp            | 17 -----------------
 test_conformance/subgroups/test_subgroup_ballot.cpp     | 15 +++------------
 .../subgroups/test_subgroup_clustered_reduce.cpp        |  1 -
 6 files changed, 4 insertions(+), 33 deletions(-)

diff --git a/test_conformance/subgroups/subgroup_common_templates.h b/test_conformance/subgroups/subgroup_common_templates.h
index c1a8316c..b2648c30 100644
--- a/test_conformance/subgroups/subgroup_common_templates.h
+++ b/test_conformance/subgroups/subgroup_common_templates.h
@@ -481,7 +481,7 @@ template <typename Ty, ShuffleOp operation> struct SHF
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        int i, ii, j, k, n, delta;
+        int i, ii, j, k, n;
         cl_uint l;
         int nw = test_params.local_workgroup_size;
         int ns = test_params.subgroup_size;
diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index cc03fc4c..0944ffb3 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -1496,7 +1496,6 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         size_t tmp;
         cl_int error;
         int subgroup_size, num_subgroups;
-        size_t realSize;
         size_t global = test_params.global_workgroup_size;
         size_t local = test_params.local_workgroup_size;
         clProgramWrapper program;
diff --git a/test_conformance/subgroups/test_barrier.cpp b/test_conformance/subgroups/test_barrier.cpp
index d415eefb..fb93ddb1 100644
--- a/test_conformance/subgroups/test_barrier.cpp
+++ b/test_conformance/subgroups/test_barrier.cpp
@@ -79,7 +79,6 @@ template <int Which> struct BAR
         int ng = test_params.global_workgroup_size;
         int nj = (nw + ns - 1) / ns;
         ng = ng / nw;
-        int e;
 
         ii = 0;
         for (k = 0; k < ng; ++k)
diff --git a/test_conformance/subgroups/test_subgroup.cpp b/test_conformance/subgroups/test_subgroup.cpp
index aa9b32cb..75e9d4ae 100644
--- a/test_conformance/subgroups/test_subgroup.cpp
+++ b/test_conformance/subgroups/test_subgroup.cpp
@@ -134,23 +134,6 @@ template <NonUniformVoteOp operation> struct AA
     }
 };
 
-static const char *any_source = "__kernel void test_any(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_any(in[gid]);\n"
-                                "}\n";
-
-static const char *all_source = "__kernel void test_all(const __global Type "
-                                "*in, __global int4 *xy, __global Type *out)\n"
-                                "{\n"
-                                "    int gid = get_global_id(0);\n"
-                                "    XY(xy,gid);\n"
-                                "    out[gid] = sub_group_all(in[gid]);\n"
-                                "}\n";
-
-
 template <typename T>
 int run_broadcast_scan_reduction_for_type(RunTestForType rft)
 {
diff --git a/test_conformance/subgroups/test_subgroup_ballot.cpp b/test_conformance/subgroups/test_subgroup_ballot.cpp
index b35520e6..3882311d 100644
--- a/test_conformance/subgroups/test_subgroup_ballot.cpp
+++ b/test_conformance/subgroups/test_subgroup_ballot.cpp
@@ -190,14 +190,13 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        int wi_id, sb_id, wg_id, l;
+        int wi_id, sb_id, wg_id;
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
         int sb_number = (lws + sbs - 1) / sbs;
         int wg_number = gws / lws;
         int limit_sbs = sbs > 100 ? 100 : sbs;
-        int non_uniform_size = gws % lws;
 
         for (wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
@@ -235,7 +234,7 @@ template <typename Ty, BallotOp operation> struct BALLOT_BIT_EXTRACT
     static test_status chk(Ty *x, Ty *y, Ty *mx, Ty *my, cl_int *m,
                            const WorkGroupParams &test_params)
     {
-        int wi_id, wg_id, l, sb_id;
+        int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
@@ -351,10 +350,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        int gws = test_params.global_workgroup_size;
-        int lws = test_params.local_workgroup_size;
-        int sbs = test_params.subgroup_size;
-        int non_uniform_size = gws % lws;
         // no work here
     }
 
@@ -398,9 +393,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_INVERSE
                 {
                     current_sbs = wg_offset + sbs > lws ? lws - wg_offset : sbs;
                 }
-                // take index of array where info which work_item will
-                // be broadcast its value is stored
-                int midx = 4 * wg_offset + 2;
                 // take subgroup local id of this work_item
                 // Check result
                 for (wi_id = 0; wi_id < current_sbs; ++wi_id)
@@ -461,7 +453,6 @@ template <typename Ty, BallotOp operation> struct BALLOT_COUNT_SCAN_FIND
         {
             wg_number++;
         }
-        int e;
         for (wg_id = 0; wg_id < wg_number; ++wg_id)
         { // for each work_group
             if (non_uniform_size && wg_id == wg_number - 1)
@@ -683,7 +674,7 @@ template <typename Ty, BallotOp operation> struct SMASK
 
     static void gen(Ty *x, Ty *t, cl_int *m, const WorkGroupParams &test_params)
     {
-        int wi_id, wg_id, l, sb_id;
+        int wi_id, wg_id, sb_id;
         int gws = test_params.global_workgroup_size;
         int lws = test_params.local_workgroup_size;
         int sbs = test_params.subgroup_size;
diff --git a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
index b016bf99..38652d51 100644
--- a/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
+++ b/test_conformance/subgroups/test_subgroup_clustered_reduce.cpp
@@ -102,7 +102,6 @@ template <typename Ty, ArithmeticOp operation> struct RED_CLU
             {
                 int ii = j * ns;
                 int n = ii + ns > nw ? nw - ii : ns;
-                int midx = 4 * ii + 2;
                 std::vector<Ty> clusters_results;
                 int clusters_counter = ns / test_params.cluster_size;
                 clusters_results.resize(clusters_counter);
-- 
cgit v1.2.3


From 38639f229ddc3618eaed9591135538aff976fdca Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 4 Aug 2022 15:05:10 +0100
Subject: Fix test_select verification failure reporting (#1462)

When verification of the computed result fails, the test would still
report as "passed".  This is because `s_test_fail` is only written to
and never read.

Fix the immediate issue by returning a failure value and incrementing
`gFailCount` if any error was detected.  The error handling can be
improved further, but I'm leaving that out of the scope of this fix.

Fixes https://github.com/KhronosGroup/OpenCL-CTS/issues/1445

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/select/test_select.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 35f154ac..e659206e 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -79,7 +79,6 @@ static int s_wimpy_reduction_factor = 256;
 // sub tests which is for each individual test.  The following
 // tracks the subtests
 int s_test_cnt = 0;
-int s_test_fail = 0;
 
 //-----------------------------------------
 // Static helper functions
@@ -297,6 +296,7 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
 static int doTest(cl_command_queue queue, cl_context context, Type stype, Type cmptype, cl_device_id device)
 {
     int err = CL_SUCCESS;
+    int s_test_fail = 0;
     MTdata    d;
     const size_t element_count[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
     cl_mem src1 = NULL;
@@ -468,6 +468,11 @@ exit:
         clReleaseProgram(programs[vecsize]);
     }
     ++s_test_cnt;
+    if (s_test_fail)
+    {
+        err = TEST_FAIL;
+        gFailCount++;
+    }
     return err;
 }
 
-- 
cgit v1.2.3


From d647529fec1a9f6d28f07a2a85cae345aacb2dd6 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 16 Aug 2022 14:42:33 +0100
Subject: [NFC] Fix missing `double_double.lo` initializer (#1466)

Fixes a missing-field-initializers warning.  The original intent was
most likely to initialize both fields (similar to other functions in
this file), but a `,` was missed.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/reference_math.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index 16db3d67..a0a3d65d 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -2321,7 +2321,7 @@ static inline double_double accum_d(double_double a, double b)
 
 static inline double_double add_dd(double_double a, double_double b)
 {
-    double_double r = { -0.0 - 0.0 };
+    double_double r = { -0.0, -0.0 };
 
     if (isinf(a.hi) || isinf(b.hi) || isnan(a.hi) || isnan(b.hi) || 0.0 == a.hi
         || 0.0 == b.hi)
-- 
cgit v1.2.3


From e52a97e4e9800ccf34678d915281b22524461ea8 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 23 Aug 2022 17:57:05 +0100
Subject: [NFC] Use Unix-style line endings (#1468)

Use the same line ending style across all source files.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 ...est_cl_khr_spirv_no_integer_wrap_decoration.cpp | 438 ++++++++++-----------
 1 file changed, 219 insertions(+), 219 deletions(-)

diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
index 9e1789c2..6a4982eb 100644
--- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
@@ -1,219 +1,219 @@
-/******************************************************************
-Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved.
-
-This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
-This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
-third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
-broadcast or otherwise exploited in any manner without the express prior written permission
-of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
-disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
-in whole or in part other than under the terms of the Khronos Adopters Agreement
-or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
-******************************************************************/
-
-#include "testBase.h"
-#include "types.hpp"
-
-#include <sstream>
-#include <string>
-#include <type_traits>
-
-
-template<typename T>
-int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID,
-               cl_context context,
-               cl_command_queue queue,
-               const char *spvName,
-               const char *funcName,
-               const char *Tname)
-{
-
-    cl_int err = CL_SUCCESS;
-    const int num = 10;
-    std::vector<T> h_lhs(num);
-    std::vector<T> h_rhs(num);
-    std::vector<T> expected_results(num);
-    std::vector<T> h_ref(num);
-    if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) {
-        log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n");
-        return 0;
-    }
-
-    /*Test with some values that do not cause overflow*/
-    if (std::is_signed<T>::value == true) {
-        h_lhs.push_back((T)-25000);
-        h_lhs.push_back((T)-3333);
-        h_lhs.push_back((T)-7);
-        h_lhs.push_back((T)-1);
-        h_lhs.push_back(0);
-        h_lhs.push_back(1);
-        h_lhs.push_back(1024);
-        h_lhs.push_back(2048);
-        h_lhs.push_back(4094);
-        h_lhs.push_back(10000);
-    } else {
-        h_lhs.push_back(0);
-        h_lhs.push_back(1);
-        h_lhs.push_back(3);
-        h_lhs.push_back(5);
-        h_lhs.push_back(10);
-        h_lhs.push_back(100);
-        h_lhs.push_back(1024);
-        h_lhs.push_back(2048);
-        h_lhs.push_back(4094);
-        h_lhs.push_back(52888);
-    }
-
-    h_rhs.push_back(0);
-    h_rhs.push_back(1);
-    h_rhs.push_back(2);
-    h_rhs.push_back(3);
-    h_rhs.push_back(4);
-    h_rhs.push_back(5);
-    h_rhs.push_back(6);
-    h_rhs.push_back(7);
-    h_rhs.push_back(8);
-    h_rhs.push_back(9);
-    size_t bytes = num * sizeof(T);
-
-    clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer");
-
-    err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer");
-
-    clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer");
-
-    err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer");
-
-    std::string kernelStr;
-
-    {
-        std::stringstream kernelStream;
-        kernelStream << "#define spirv_fadd(a, b) (a) + (b)               \n";
-        kernelStream << "#define spirv_fsub(a, b) (a) - (b)               \n";
-        kernelStream << "#define spirv_fmul(a, b) (a) * (b)               \n";
-        kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b)        \n";
-        kernelStream << "#define spirv_fnegate(a, b)  (-a)                \n";
-
-        kernelStream << "#define T " << Tname                         << "\n";
-        kernelStream << "#define FUNC spirv_" << funcName             << "\n";
-        kernelStream << "__kernel void fmath_cl(__global T *out,          \n";
-        kernelStream << "const __global T *lhs, const __global T *rhs)    \n";
-        kernelStream << "{                                                \n";
-        kernelStream << "    int id = get_global_id(0);                   \n";
-        kernelStream << "    out[id] = FUNC(lhs[id], rhs[id]);            \n";
-        kernelStream << "}                                                \n";
-        kernelStr = kernelStream.str();
-    }
-
-    size_t kernelLen = kernelStr.size();
-    const char *kernelBuf = kernelStr.c_str();
-
-    for (int i = 0; i < num; i++) {
-        if (std::string(funcName) == std::string("fadd")) {
-            expected_results[i] = h_lhs[i] + h_rhs[i];
-        } else if (std::string(funcName) == std::string("fsub")) {
-            expected_results[i] = h_lhs[i] - h_rhs[i];
-        } else if (std::string(funcName) == std::string("fmul")) {
-            expected_results[i] = h_lhs[i] * h_rhs[i];
-        } else if (std::string(funcName) == std::string("fshiftleft")) {
-            expected_results[i] = h_lhs[i] << h_rhs[i];
-        } else if (std::string(funcName) == std::string("fnegate")) {
-            expected_results[i] = 0 - h_lhs[i];
-        }
-    }
-
-    {
-        // Run the cl kernel for reference results
-        clProgramWrapper prog;
-        clKernelWrapper kernel;
-        err = create_single_kernel_helper(context, &prog, &kernel, 1,
-                                          &kernelBuf, "fmath_cl");
-        SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
-
-        clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
-        SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
-
-        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref);
-        SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
-
-        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
-        SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
-
-        err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
-        SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
-
-        size_t global = num;
-        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
-        SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
-
-        err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL);
-        SPIRV_CHECK_ERROR(err, "Failed to read from ref");
-    }
-
-    for (int i = 0; i < num; i++) {
-        if (expected_results[i] != h_ref[i]) {
-            log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]);
-            return -1;
-        }
-    }
-
-    clProgramWrapper prog;
-    err = get_program_with_il(prog, deviceID, context, spvName);
-    SPIRV_CHECK_ERROR(err, "Failed to build program");
-
-    clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
-
-    clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
-    SPIRV_CHECK_ERROR(err, "Failed to create res buffer");
-
-    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res);
-    SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
-
-    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
-    SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
-
-    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
-    SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
-
-    size_t global = num;
-    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
-
-    std::vector<T> h_res(num);
-    err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
-    SPIRV_CHECK_ERROR(err, "Failed to read from ref");
-
-    for (int i = 0; i < num; i++) {
-        if (expected_results[i] != h_res[i]) {
-            log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]);
-            return -1;
-        }
-    }
-
-    return 0;
-}
-
-#define TEST_FMATH_FUNC(TYPE, FUNC)                                                              \
-    TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE)                 \
-    {                                                                                            \
-        return test_ext_cl_khr_spirv_no_integer_wrap_decoration<cl_##TYPE>(deviceID, context, queue, \
-                          "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE,           \
-                          #FUNC,                                                                 \
-                          #TYPE                                                                  \
-                          );                                                                     \
-    }
-
-TEST_FMATH_FUNC(int, fadd)
-TEST_FMATH_FUNC(int, fsub)
-TEST_FMATH_FUNC(int, fmul)
-TEST_FMATH_FUNC(int, fshiftleft)
-TEST_FMATH_FUNC(int, fnegate)
-TEST_FMATH_FUNC(uint, fadd)
-TEST_FMATH_FUNC(uint, fsub)
-TEST_FMATH_FUNC(uint, fmul)
-TEST_FMATH_FUNC(uint, fshiftleft)
\ No newline at end of file
+/******************************************************************
+Copyright (c) 2018 The Khronos Group Inc. All Rights Reserved.
+
+This code is protected by copyright laws and contains material proprietary to the Khronos Group, Inc.
+This is UNPUBLISHED PROPRIETARY SOURCE CODE that may not be disclosed in whole or in part to
+third parties, and may not be reproduced, republished, distributed, transmitted, displayed,
+broadcast or otherwise exploited in any manner without the express prior written permission
+of Khronos Group. The receipt or possession of this code does not convey any rights to reproduce,
+disclose, or distribute its contents, or to manufacture, use, or sell anything that it may describe,
+in whole or in part other than under the terms of the Khronos Adopters Agreement
+or Khronos Conformance Test Source License Agreement as executed between Khronos and the recipient.
+******************************************************************/
+
+#include "testBase.h"
+#include "types.hpp"
+
+#include <sstream>
+#include <string>
+#include <type_traits>
+
+
+template<typename T>
+int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID,
+               cl_context context,
+               cl_command_queue queue,
+               const char *spvName,
+               const char *funcName,
+               const char *Tname)
+{
+
+    cl_int err = CL_SUCCESS;
+    const int num = 10;
+    std::vector<T> h_lhs(num);
+    std::vector<T> h_rhs(num);
+    std::vector<T> expected_results(num);
+    std::vector<T> h_ref(num);
+    if (!is_extension_available(deviceID, "cl_khr_spirv_no_integer_wrap_decoration")) {
+        log_info("Extension cl_khr_spirv_no_integer_wrap_decoration not supported; skipping tests.\n");
+        return 0;
+    }
+
+    /*Test with some values that do not cause overflow*/
+    if (std::is_signed<T>::value == true) {
+        h_lhs.push_back((T)-25000);
+        h_lhs.push_back((T)-3333);
+        h_lhs.push_back((T)-7);
+        h_lhs.push_back((T)-1);
+        h_lhs.push_back(0);
+        h_lhs.push_back(1);
+        h_lhs.push_back(1024);
+        h_lhs.push_back(2048);
+        h_lhs.push_back(4094);
+        h_lhs.push_back(10000);
+    } else {
+        h_lhs.push_back(0);
+        h_lhs.push_back(1);
+        h_lhs.push_back(3);
+        h_lhs.push_back(5);
+        h_lhs.push_back(10);
+        h_lhs.push_back(100);
+        h_lhs.push_back(1024);
+        h_lhs.push_back(2048);
+        h_lhs.push_back(4094);
+        h_lhs.push_back(52888);
+    }
+
+    h_rhs.push_back(0);
+    h_rhs.push_back(1);
+    h_rhs.push_back(2);
+    h_rhs.push_back(3);
+    h_rhs.push_back(4);
+    h_rhs.push_back(5);
+    h_rhs.push_back(6);
+    h_rhs.push_back(7);
+    h_rhs.push_back(8);
+    h_rhs.push_back(9);
+    size_t bytes = num * sizeof(T);
+
+    clMemWrapper lhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create lhs buffer");
+
+    err = clEnqueueWriteBuffer(queue, lhs, CL_TRUE, 0, bytes, &h_lhs[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy to lhs buffer");
+
+    clMemWrapper rhs = clCreateBuffer(context, CL_MEM_READ_ONLY, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create rhs buffer");
+
+    err = clEnqueueWriteBuffer(queue, rhs, CL_TRUE, 0, bytes, &h_rhs[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to copy to rhs buffer");
+
+    std::string kernelStr;
+
+    {
+        std::stringstream kernelStream;
+        kernelStream << "#define spirv_fadd(a, b) (a) + (b)               \n";
+        kernelStream << "#define spirv_fsub(a, b) (a) - (b)               \n";
+        kernelStream << "#define spirv_fmul(a, b) (a) * (b)               \n";
+        kernelStream << "#define spirv_fshiftleft(a, b) (a) << (b)        \n";
+        kernelStream << "#define spirv_fnegate(a, b)  (-a)                \n";
+
+        kernelStream << "#define T " << Tname                         << "\n";
+        kernelStream << "#define FUNC spirv_" << funcName             << "\n";
+        kernelStream << "__kernel void fmath_cl(__global T *out,          \n";
+        kernelStream << "const __global T *lhs, const __global T *rhs)    \n";
+        kernelStream << "{                                                \n";
+        kernelStream << "    int id = get_global_id(0);                   \n";
+        kernelStream << "    out[id] = FUNC(lhs[id], rhs[id]);            \n";
+        kernelStream << "}                                                \n";
+        kernelStr = kernelStream.str();
+    }
+
+    size_t kernelLen = kernelStr.size();
+    const char *kernelBuf = kernelStr.c_str();
+
+    for (int i = 0; i < num; i++) {
+        if (std::string(funcName) == std::string("fadd")) {
+            expected_results[i] = h_lhs[i] + h_rhs[i];
+        } else if (std::string(funcName) == std::string("fsub")) {
+            expected_results[i] = h_lhs[i] - h_rhs[i];
+        } else if (std::string(funcName) == std::string("fmul")) {
+            expected_results[i] = h_lhs[i] * h_rhs[i];
+        } else if (std::string(funcName) == std::string("fshiftleft")) {
+            expected_results[i] = h_lhs[i] << h_rhs[i];
+        } else if (std::string(funcName) == std::string("fnegate")) {
+            expected_results[i] = 0 - h_lhs[i];
+        }
+    }
+
+    {
+        // Run the cl kernel for reference results
+        clProgramWrapper prog;
+        clKernelWrapper kernel;
+        err = create_single_kernel_helper(context, &prog, &kernel, 1,
+                                          &kernelBuf, "fmath_cl");
+        SPIRV_CHECK_ERROR(err, "Failed to create cl kernel");
+
+        clMemWrapper ref = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+        SPIRV_CHECK_ERROR(err, "Failed to create ref buffer");
+
+        err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &ref);
+        SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
+
+        err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
+        SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
+
+        err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
+        SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
+
+        size_t global = num;
+        err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
+
+        err = clEnqueueReadBuffer(queue, ref, CL_TRUE, 0, bytes, &h_ref[0], 0, NULL, NULL);
+        SPIRV_CHECK_ERROR(err, "Failed to read from ref");
+    }
+
+    for (int i = 0; i < num; i++) {
+        if (expected_results[i] != h_ref[i]) {
+            log_error("Values do not match at index %d expected = %d got = %d\n", i, expected_results[i], h_ref[i]);
+            return -1;
+        }
+    }
+
+    clProgramWrapper prog;
+    err = get_program_with_il(prog, deviceID, context, spvName);
+    SPIRV_CHECK_ERROR(err, "Failed to build program");
+
+    clKernelWrapper kernel = clCreateKernel(prog, "fmath_cl", &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create spv kernel");
+
+    clMemWrapper res = clCreateBuffer(context, CL_MEM_READ_WRITE, bytes, NULL, &err);
+    SPIRV_CHECK_ERROR(err, "Failed to create res buffer");
+
+    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &res);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 0");
+
+    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &lhs);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 1");
+
+    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &rhs);
+    SPIRV_CHECK_ERROR(err, "Failed to set arg 2");
+
+    size_t global = num;
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to enqueue cl kernel");
+
+    std::vector<T> h_res(num);
+    err = clEnqueueReadBuffer(queue, res, CL_TRUE, 0, bytes, &h_res[0], 0, NULL, NULL);
+    SPIRV_CHECK_ERROR(err, "Failed to read from ref");
+
+    for (int i = 0; i < num; i++) {
+        if (expected_results[i] != h_res[i]) {
+            log_error("Values do not match at location %d expected = %d got = %d\n", i, expected_results[i], h_res[i]);
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+#define TEST_FMATH_FUNC(TYPE, FUNC)                                                              \
+    TEST_SPIRV_FUNC(ext_cl_khr_spirv_no_integer_wrap_decoration_##FUNC##_##TYPE)                 \
+    {                                                                                            \
+        return test_ext_cl_khr_spirv_no_integer_wrap_decoration<cl_##TYPE>(deviceID, context, queue, \
+                          "ext_cl_khr_spirv_no_integer_wrap_decoration_"#FUNC"_"#TYPE,           \
+                          #FUNC,                                                                 \
+                          #TYPE                                                                  \
+                          );                                                                     \
+    }
+
+TEST_FMATH_FUNC(int, fadd)
+TEST_FMATH_FUNC(int, fsub)
+TEST_FMATH_FUNC(int, fmul)
+TEST_FMATH_FUNC(int, fshiftleft)
+TEST_FMATH_FUNC(int, fnegate)
+TEST_FMATH_FUNC(uint, fadd)
+TEST_FMATH_FUNC(uint, fsub)
+TEST_FMATH_FUNC(uint, fmul)
+TEST_FMATH_FUNC(uint, fshiftleft)
-- 
cgit v1.2.3


From 9666ca3c70192002c89130913205458db0a3d334 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 23 Aug 2022 18:02:33 +0100
Subject: [NFC] Fix sign-compare warnings in math_brute_force (#1467)

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/main.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index d1d146a1..45b6e97d 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -129,8 +129,9 @@ static int doTest(const char *name)
         const Func *const temp_func = functionList + i;
         if (strcmp(temp_func->name, name) == 0)
         {
-            if ((gStartTestNumber != -1 && i < gStartTestNumber)
-                || i > gEndTestNumber)
+            if ((gStartTestNumber != -1
+                 && static_cast<int32_t>(i) < gStartTestNumber)
+                || static_cast<int32_t>(i) > gEndTestNumber)
             {
                 vlog("Skipping function #%d\n", i);
                 return 0;
@@ -524,7 +525,7 @@ static int ParseArgs(int argc, const char **argv)
 static void PrintFunctions(void)
 {
     vlog("\nMath function names:\n");
-    for (int i = 0; i < functionListCount; i++)
+    for (size_t i = 0; i < functionListCount; i++)
     {
         vlog("\t%s\n", functionList[i].name);
     }
-- 
cgit v1.2.3


From c82dabd4bbe7c61f5251488e471f9938ed20630d Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 24 Aug 2022 10:31:32 +0100
Subject: Use clCommandQueueWrapper in math_brute_force (#1463)

Simplify code by avoiding manual resource management.

This commit only modifies tests that use one queue per thread.  The
other unmodified tests are single-threaded and use the global
`gQueue`.

Original patch by Marco Antognini.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp          | 5 +++--
 test_conformance/math_brute_force/binary_float.cpp           | 5 +++--
 test_conformance/math_brute_force/binary_i_double.cpp        | 5 +++--
 test_conformance/math_brute_force/binary_i_float.cpp         | 5 +++--
 test_conformance/math_brute_force/binary_operator_double.cpp | 5 +++--
 test_conformance/math_brute_force/binary_operator_float.cpp  | 5 +++--
 test_conformance/math_brute_force/macro_binary_double.cpp    | 5 +++--
 test_conformance/math_brute_force/macro_binary_float.cpp     | 5 +++--
 test_conformance/math_brute_force/macro_unary_double.cpp     | 5 +++--
 test_conformance/math_brute_force/macro_unary_float.cpp      | 5 +++--
 test_conformance/math_brute_force/unary_double.cpp           | 5 +++--
 test_conformance/math_brute_force/unary_float.cpp            | 5 +++--
 12 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 034b325a..490c17b6 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -133,7 +133,9 @@ struct ThreadInfo
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -795,7 +797,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 7abaa0e4..01082bc1 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -131,7 +131,9 @@ struct ThreadInfo
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -952,7 +954,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index bba93617..def0bd41 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -132,7 +132,9 @@ struct ThreadInfo
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -717,7 +719,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 4821830c..ed207098 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -130,7 +130,9 @@ struct ThreadInfo
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -710,7 +712,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 09c560e9..992df276 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -132,7 +132,9 @@ struct ThreadInfo
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -762,7 +764,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index f2e57bc1..a555beaa 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -130,7 +130,9 @@ struct ThreadInfo
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -889,7 +891,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index 88b0f86c..fb28d823 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -126,7 +126,9 @@ struct ThreadInfo
     cl_mem inBuf2; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -707,7 +709,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 6199dd1a..fd93e2e6 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -124,7 +124,9 @@ struct ThreadInfo
     cl_mem inBuf2; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -696,7 +698,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf2);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index b7fb8a96..2365a195 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -118,7 +118,9 @@ struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -478,7 +480,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index e4c22369..adc6c3ec 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -117,7 +117,9 @@ struct ThreadInfo
 {
     cl_mem inBuf; // input buffer for the thread
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -492,7 +494,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 76bcfd46..19402283 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -120,7 +120,9 @@ struct ThreadInfo
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -516,7 +518,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index d310054d..5a9a7361 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -118,7 +118,9 @@ struct ThreadInfo
     cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
-    cl_command_queue tQueue; // per thread command queue to improve performance
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
 };
 
 struct TestInfo
@@ -693,7 +695,6 @@ exit:
         clReleaseMemObject(threadInfo.inBuf);
         for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
             clReleaseMemObject(threadInfo.outBuf[j]);
-        clReleaseCommandQueue(threadInfo.tQueue);
     }
 
     return error;
-- 
cgit v1.2.3


From afe4ef8b8f63f13c0cb3a6d7eaff5dc761c3d2b1 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 24 Aug 2022 12:05:01 +0100
Subject: Fix test skipping in math_brute_force (#1475)

Commit 9666ca3c ("[NFC] Fix sign-compare warnings in math_brute_force
(#1467)", 2022-08-23) inadvertently changed the semantics of the if
condition.  The `i > gEndTestNumber` comparison was relying on
`gEndTestNumber` being promoted to unsigned.  When casting `i` to
`int32_t`, this promotion no longer happens and as a result any tests
given on the command line were being skipped.

Use an unsigned type for `gStartTestNumber` and `gEndTestNumber` to
eliminate the casts and any implicit conversions between signed and
unsigned types.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/main.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 45b6e97d..8cebff9d 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -58,8 +58,8 @@ static char appName[MAXPATHLEN] = "";
 cl_device_id gDevice = NULL;
 cl_context gContext = NULL;
 cl_command_queue gQueue = NULL;
-static int32_t gStartTestNumber = -1;
-static int32_t gEndTestNumber = -1;
+static size_t gStartTestNumber = ~0u;
+static size_t gEndTestNumber = ~0u;
 int gSkipCorrectnessTesting = 0;
 static int gStopOnError = 0;
 static bool gSkipRestOfTests;
@@ -129,9 +129,8 @@ static int doTest(const char *name)
         const Func *const temp_func = functionList + i;
         if (strcmp(temp_func->name, name) == 0)
         {
-            if ((gStartTestNumber != -1
-                 && static_cast<int32_t>(i) < gStartTestNumber)
-                || static_cast<int32_t>(i) > gEndTestNumber)
+            if ((gStartTestNumber != ~0u && i < gStartTestNumber)
+                || i > gEndTestNumber)
             {
                 vlog("Skipping function #%d\n", i);
                 return 0;
@@ -468,7 +467,7 @@ static int ParseArgs(int argc, const char **argv)
             long number = strtol(arg, &t, 0);
             if (t != arg)
             {
-                if (-1 == gStartTestNumber)
+                if (~0u == gStartTestNumber)
                     gStartTestNumber = (int32_t)number;
                 else
                     gEndTestNumber = gStartTestNumber + (int32_t)number;
-- 
cgit v1.2.3


From f4eb852b6d376afb827da4999cdfd5e0376b6a40 Mon Sep 17 00:00:00 2001
From: stoneforestwhu <stoneforestwhu@gmail.com>
Date: Wed, 31 Aug 2022 00:47:15 +0800
Subject: support format CL_ABGR (#1474)

* support format CL_ABGR

add code to handle format CL_ABGR

* Update imageHelpers.h

* fix format
---
 test_common/harness/imageHelpers.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test_common/harness/imageHelpers.h b/test_common/harness/imageHelpers.h
index 2cc8e68e..f8ae4fb9 100644
--- a/test_common/harness/imageHelpers.h
+++ b/test_common/harness/imageHelpers.h
@@ -482,6 +482,13 @@ void read_image_pixel(void *imageData, image_descriptor *imageInfo, int x,
         outData[2] = tempData[3];
         outData[3] = tempData[0];
     }
+    else if (format->image_channel_order == CL_ABGR)
+    {
+        outData[0] = tempData[3];
+        outData[1] = tempData[2];
+        outData[2] = tempData[1];
+        outData[3] = tempData[0];
+    }
     else if ((format->image_channel_order == CL_BGRA)
              || (format->image_channel_order == CL_sBGRA))
     {
-- 
cgit v1.2.3


From 8f5a2f0ae8b083665773281e01ff8e87e286b671 Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Tue, 30 Aug 2022 17:54:50 +0100
Subject: Initial command-buffer extension tests (#1368)

* Initial command-buffer tests

Introduce some basic testing of the
[cl_khr_command_buffer](https://www.khronos.org/registry/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_command_buffer)
extension. This is intended as a starting point from which we can iteratively build up tests
for the extension collaboratively.

* Move tests into derived classes

* Move tests from methods into derived classes implementing
  a `Run()` interface.
* Fix memory leak when command_buffer isn't freed when a test
  is skipped.
* Print correct error code for
  `CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR`
* Pass `nullptr` for queue parameter to command recording entry-points

* Define command-buffer type wrapper

Other OpenCL object have a wrapper to reference count their use
and free the wrapped object. The command-buffer object can't use
the generic type wrappers which are templated on the appropriate
release/retain function, as the release/retain functions are
queried at runtime.

Instead, define our own command-buffer wrapper class where a base
object is passed on construction which contains function pointers
to the release/retain functions that can be used in the wrapper.

* Use create_single_kernel_helper_create_program

Use `create_single_kernel_helper_create_program` rather than
hardcoding `clCreateProgramWithSource` to allow for other types
of program input.

Also fix bug using wrong enum for passing properties on command-buffer
creation, should be `CL_COMMAND_BUFFER_FLAGS_KHR`

* Add out-of-order command-buffer test

Introduce a basic test for checking sync-point use
with out-of-order command-buffers.

This also includes better checking of required queue properties.
---
 test_conformance/extensions/CMakeLists.txt         |   1 +
 .../cl_khr_command_buffer/CMakeLists.txt           |   8 +
 .../cl_khr_command_buffer/basic_command_buffer.cpp | 588 +++++++++++++++++++++
 .../command_buffer_test_base.h                     | 177 +++++++
 .../extensions/cl_khr_command_buffer/main.cpp      |  35 ++
 .../extensions/cl_khr_command_buffer/procs.h       |  35 ++
 6 files changed, 844 insertions(+)
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/main.cpp
 create mode 100644 test_conformance/extensions/cl_khr_command_buffer/procs.h

diff --git a/test_conformance/extensions/CMakeLists.txt b/test_conformance/extensions/CMakeLists.txt
index 53d77ee5..d95d29aa 100644
--- a/test_conformance/extensions/CMakeLists.txt
+++ b/test_conformance/extensions/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory( cl_ext_cxx_for_opencl )
+add_subdirectory( cl_khr_command_buffer )
 add_subdirectory( cl_khr_dx9_media_sharing )
diff --git a/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
new file mode 100644
index 00000000..ac259f6d
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(MODULE_NAME CL_KHR_COMMAND_BUFFER)
+
+set(${MODULE_NAME}_SOURCES
+    main.cpp
+    basic_command_buffer.cpp
+)
+
+include(../../CMakeCommon.txt)
diff --git a/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
new file mode 100644
index 00000000..62a02d83
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/basic_command_buffer.cpp
@@ -0,0 +1,588 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "command_buffer_test_base.h"
+#include "procs.h"
+#include "harness/typeWrappers.h"
+
+#include <algorithm>
+#include <cstring>
+#include <vector>
+
+#define CHECK_VERIFICATION_ERROR(reference, result, index)                     \
+    {                                                                          \
+        if (reference != result)                                               \
+        {                                                                      \
+            log_error("Expected %d was %d at index %u\n", reference, result,   \
+                      index);                                                  \
+            return TEST_FAIL;                                                  \
+        }                                                                      \
+    }
+
+namespace {
+
+// Helper test fixture for constructing OpenCL objects used in testing
+// a variety of simple command-buffer enqueue scenarios.
+struct BasicCommandBufferTest : CommandBufferTestBase
+{
+
+    BasicCommandBufferTest(cl_device_id device, cl_context context,
+                           cl_command_queue queue)
+        : CommandBufferTestBase(device), context(context), queue(queue),
+          command_buffer(this), simultaneous_use(false),
+          out_of_order_support(false), num_elements(0)
+    {}
+
+    virtual bool Skip()
+    {
+        cl_command_queue_properties required_properties;
+        cl_int error = clGetDeviceInfo(
+            device, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR,
+            sizeof(required_properties), &required_properties, NULL);
+        test_error(error,
+                   "Unable to query "
+                   "CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR");
+
+        cl_command_queue_properties queue_properties;
+
+        error = clGetCommandQueueInfo(queue, CL_QUEUE_PROPERTIES,
+                                      sizeof(queue_properties),
+                                      &queue_properties, NULL);
+        test_error(error, "Unable to query CL_QUEUE_PROPERTIES");
+
+        // Skip if queue properties don't contain those required
+        return required_properties != (required_properties & queue_properties);
+    }
+
+    virtual cl_int SetUp(int elements)
+    {
+        cl_int error = init_extension_functions();
+        if (error != CL_SUCCESS)
+        {
+            return error;
+        }
+
+        // Query if device supports simultaneous use
+        cl_device_command_buffer_capabilities_khr capabilities;
+        error =
+            clGetDeviceInfo(device, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR,
+                            sizeof(capabilities), &capabilities, NULL);
+        test_error(error,
+                   "Unable to query CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR");
+        simultaneous_use =
+            capabilities & CL_COMMAND_BUFFER_CAPABILITY_SIMULTANEOUS_USE_KHR;
+        out_of_order_support =
+            capabilities & CL_COMMAND_BUFFER_CAPABILITY_OUT_OF_ORDER_KHR;
+
+        if (elements <= 0)
+        {
+            return CL_INVALID_VALUE;
+        }
+        num_elements = static_cast<size_t>(elements);
+
+        // Kernel performs a parallel copy from an input buffer to output buffer
+        // is created.
+        const char *kernel_str =
+            R"(
+        __kernel void copy(__global int* in, __global int* out) {
+            size_t id = get_global_id(0);
+            out[id] = in[id];
+        })";
+
+        error = create_single_kernel_helper_create_program(context, &program, 1,
+                                                           &kernel_str);
+        test_error(error, "Failed to create program with source");
+
+        error = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
+        test_error(error, "Failed to build program");
+
+        in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,
+                                sizeof(cl_int) * num_elements, nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        out_mem =
+            clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                           sizeof(cl_int) * num_elements, nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        kernel = clCreateKernel(program, "copy", &error);
+        test_error(error, "Failed to create copy kernel");
+
+        error = clSetKernelArg(kernel, 0, sizeof(in_mem), &in_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        error = clSetKernelArg(kernel, 1, sizeof(out_mem), &out_mem);
+        test_error(error, "clSetKernelArg failed");
+
+        if (simultaneous_use)
+        {
+            cl_command_buffer_properties_khr properties[3] = {
+                CL_COMMAND_BUFFER_FLAGS_KHR,
+                CL_COMMAND_BUFFER_SIMULTANEOUS_USE_KHR, 0
+            };
+            command_buffer =
+                clCreateCommandBufferKHR(1, &queue, properties, &error);
+        }
+        else
+        {
+            command_buffer =
+                clCreateCommandBufferKHR(1, &queue, nullptr, &error);
+        }
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    // Test body returning an OpenCL error code
+    virtual cl_int Run() = 0;
+
+
+protected:
+    size_t data_size() const { return num_elements * sizeof(cl_int); }
+
+    cl_context context;
+    cl_command_queue queue;
+    clCommandBufferWrapper command_buffer;
+    clProgramWrapper program;
+    clKernelWrapper kernel;
+    clMemWrapper in_mem, out_mem;
+    size_t num_elements;
+
+    // Device support query results
+    bool simultaneous_use;
+    bool out_of_order_support;
+};
+
+// Test enqueuing a command-buffer containing a single NDRange command once
+struct BasicEnqueueTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        const cl_int pattern = 42;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+// Test enqueuing a command-buffer containing multiple command, including
+// operations other than NDRange kernel execution.
+struct MixedCommandsTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error;
+        const size_t iterations = 4;
+        clMemWrapper result_mem =
+            clCreateBuffer(context, CL_MEM_READ_WRITE,
+                           sizeof(cl_int) * iterations, nullptr, &error);
+        test_error(error, "clCreateBuffer failed");
+
+        const cl_int pattern_base = 42;
+        for (size_t i = 0; i < iterations; i++)
+        {
+            const cl_int pattern = pattern_base + i;
+            cl_int error = clCommandFillBufferKHR(
+                command_buffer, nullptr, in_mem, &pattern, sizeof(cl_int), 0,
+                data_size(), 0, nullptr, nullptr, nullptr);
+            test_error(error, "clCommandFillBufferKHR failed");
+
+            error = clCommandNDRangeKernelKHR(
+                command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+                &num_elements, nullptr, 0, nullptr, nullptr, nullptr);
+            test_error(error, "clCommandNDRangeKernelKHR failed");
+
+            const size_t result_offset = i * sizeof(cl_int);
+            error = clCommandCopyBufferKHR(
+                command_buffer, nullptr, out_mem, result_mem, 0, result_offset,
+                sizeof(cl_int), 0, nullptr, nullptr, nullptr);
+            test_error(error, "clCommandCopyBufferKHR failed");
+        }
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> result_data(num_elements);
+        error = clEnqueueReadBuffer(queue, result_mem, CL_TRUE, 0,
+                                    iterations * sizeof(cl_int),
+                                    result_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < iterations; i++)
+        {
+            const cl_int ref = pattern_base + i;
+            CHECK_VERIFICATION_ERROR(ref, result_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+// Test enqueueing a command-buffer blocked on a user-event
+struct UserEventTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        clEventWrapper user_event = clCreateUserEvent(context, &error);
+        test_error(error, "clCreateUserEvent failed");
+
+        const cl_int pattern = 42;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 1,
+                                          &user_event, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clSetUserEventStatus(user_event, CL_COMPLETE);
+        test_error(error, "clSetUserEventStatus failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+};
+
+// Test flushing the command-queue between command-buffer enqueues
+struct ExplicitFlushTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        const cl_int pattern_A = 42;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_A, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clFlush(queue);
+        test_error(error, "clFlush failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data_A(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data_A.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        const cl_int pattern_B = 0xA;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern_B, sizeof(cl_int),
+                                    0, data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clFlush(queue);
+        test_error(error, "clFlush failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clFlush(queue);
+        test_error(error, "clFlush failed");
+
+        std::vector<cl_int> output_data_B(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_FALSE, 0, data_size(),
+                                    output_data_B.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        error = clFinish(queue);
+        test_error(error, "clFinish failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern_A, output_data_A[i], i);
+
+            CHECK_VERIFICATION_ERROR(pattern_B, output_data_B[i], i);
+        }
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return !simultaneous_use || BasicCommandBufferTest::Skip();
+    }
+};
+
+// Test enqueueing a command-buffer twice separated by another enqueue operation
+struct InterleavedEnqueueTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+
+    cl_int Run() override
+    {
+        cl_int error = clCommandNDRangeKernelKHR(
+            command_buffer, nullptr, nullptr, kernel, 1, nullptr, &num_elements,
+            nullptr, 0, nullptr, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        cl_int pattern = 42;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        pattern = 0xABCD;
+        error = clEnqueueFillBuffer(queue, in_mem, &pattern, sizeof(cl_int), 0,
+                                    data_size(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueFillBuffer failed");
+
+        error = clEnqueueCommandBufferKHR(0, nullptr, command_buffer, 0,
+                                          nullptr, nullptr);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        error = clEnqueueCopyBuffer(queue, in_mem, out_mem, 0, 0, data_size(),
+                                    0, nullptr, nullptr);
+        test_error(error, "clEnqueueCopyBuffer failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(queue, out_mem, CL_TRUE, 0, data_size(),
+                                    output_data.data(), 0, nullptr, nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return !simultaneous_use || BasicCommandBufferTest::Skip();
+    }
+};
+
+// Test sync-points with an out-of-order command-buffer
+struct OutOfOrderTest : public BasicCommandBufferTest
+{
+    using BasicCommandBufferTest::BasicCommandBufferTest;
+    OutOfOrderTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue)
+        : BasicCommandBufferTest(device, context, queue),
+          out_of_order_command_buffer(this), out_of_order_queue(nullptr),
+          event(nullptr)
+    {}
+
+    cl_int Run() override
+    {
+        cl_sync_point_khr sync_points[2];
+
+        const cl_int pattern = 42;
+        cl_int error =
+            clCommandFillBufferKHR(out_of_order_command_buffer, nullptr, in_mem,
+                                   &pattern, sizeof(cl_int), 0, data_size(), 0,
+                                   nullptr, &sync_points[0], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        const cl_int overwritten_pattern = 0xACDC;
+        error = clCommandFillBufferKHR(out_of_order_command_buffer, nullptr,
+                                       out_mem, &overwritten_pattern,
+                                       sizeof(cl_int), 0, data_size(), 0,
+                                       nullptr, &sync_points[1], nullptr);
+        test_error(error, "clCommandFillBufferKHR failed");
+
+        error = clCommandNDRangeKernelKHR(
+            out_of_order_command_buffer, nullptr, nullptr, kernel, 1, nullptr,
+            &num_elements, nullptr, 2, sync_points, nullptr, nullptr);
+        test_error(error, "clCommandNDRangeKernelKHR failed");
+
+        error = clFinalizeCommandBufferKHR(out_of_order_command_buffer);
+        test_error(error, "clFinalizeCommandBufferKHR failed");
+
+        error = clEnqueueCommandBufferKHR(
+            0, nullptr, out_of_order_command_buffer, 0, nullptr, &event);
+        test_error(error, "clEnqueueCommandBufferKHR failed");
+
+        std::vector<cl_int> output_data(num_elements);
+        error = clEnqueueReadBuffer(out_of_order_queue, out_mem, CL_TRUE, 0,
+                                    data_size(), output_data.data(), 1, &event,
+                                    nullptr);
+        test_error(error, "clEnqueueReadBuffer failed");
+
+        for (size_t i = 0; i < num_elements; i++)
+        {
+            CHECK_VERIFICATION_ERROR(pattern, output_data[i], i);
+        }
+
+        return CL_SUCCESS;
+    }
+
+    cl_int SetUp(int elements) override
+    {
+        cl_int error = BasicCommandBufferTest::SetUp(elements);
+        test_error(error, "BasicCommandBufferTest::SetUp failed");
+
+        if (!out_of_order_support)
+        {
+            // Test will skip as device doesn't support out-of-order
+            // command-buffers
+            return CL_SUCCESS;
+        }
+
+        out_of_order_queue = clCreateCommandQueue(
+            context, device, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
+        test_error(error, "Unable to create command queue to test with");
+
+        out_of_order_command_buffer =
+            clCreateCommandBufferKHR(1, &out_of_order_queue, nullptr, &error);
+        test_error(error, "clCreateCommandBufferKHR failed");
+
+        return CL_SUCCESS;
+    }
+
+    bool Skip() override
+    {
+        return !out_of_order_support || BasicCommandBufferTest::Skip();
+    }
+
+    clCommandQueueWrapper out_of_order_queue;
+    clCommandBufferWrapper out_of_order_command_buffer;
+    clEventWrapper event;
+};
+
+#undef CHECK_VERIFICATION_ERROR
+
+template <class T>
+int MakeAndRunTest(cl_device_id device, cl_context context,
+                   cl_command_queue queue, int num_elements)
+{
+    CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device);
+
+    auto test_fixture = T(device, context, queue);
+    cl_int error = test_fixture.SetUp(num_elements);
+    test_error_ret(error, "Error in test initialization", TEST_FAIL);
+
+    if (test_fixture.Skip())
+    {
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    error = test_fixture.Run();
+    test_error_ret(error, "Test Failed", TEST_FAIL);
+
+    return TEST_PASS;
+}
+} // anonymous namespace
+
+int test_single_ndrange(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<BasicEnqueueTest>(device, context, queue,
+                                            num_elements);
+}
+
+int test_interleaved_enqueue(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<InterleavedEnqueueTest>(device, context, queue,
+                                                  num_elements);
+}
+
+int test_mixed_commands(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<MixedCommandsTest>(device, context, queue,
+                                             num_elements);
+}
+
+int test_explicit_flush(cl_device_id device, cl_context context,
+                        cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<ExplicitFlushTest>(device, context, queue,
+                                             num_elements);
+}
+
+int test_user_events(cl_device_id device, cl_context context,
+                     cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<UserEventTest>(device, context, queue, num_elements);
+}
+
+int test_out_of_order(cl_device_id device, cl_context context,
+                      cl_command_queue queue, int num_elements)
+{
+    return MakeAndRunTest<OutOfOrderTest>(device, context, queue, num_elements);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
new file mode 100644
index 00000000..0fd2e4ec
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/command_buffer_test_base.h
@@ -0,0 +1,177 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+#define _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
+
+#include <CL/cl_ext.h>
+#include "harness/deviceInfo.h"
+#include "harness/testHarness.h"
+
+
+// Base class for setting function pointers to new extension entry points
+struct CommandBufferTestBase
+{
+    CommandBufferTestBase(cl_device_id device): device(device) {}
+
+    cl_int init_extension_functions()
+    {
+        cl_platform_id platform;
+        cl_int error =
+            clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id),
+                            &platform, nullptr);
+        test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
+
+        // If it is supported get the addresses of all the APIs here.
+#define GET_EXTENSION_ADDRESS(FUNC)                                            \
+    FUNC = reinterpret_cast<FUNC##_fn>(                                        \
+        clGetExtensionFunctionAddressForPlatform(platform, #FUNC));            \
+    if (FUNC == nullptr)                                                       \
+    {                                                                          \
+        log_error("ERROR: clGetExtensionFunctionAddressForPlatform failed"     \
+                  " with " #FUNC "\n");                                        \
+        return TEST_FAIL;                                                      \
+    }
+
+        GET_EXTENSION_ADDRESS(clCreateCommandBufferKHR);
+        GET_EXTENSION_ADDRESS(clReleaseCommandBufferKHR);
+        GET_EXTENSION_ADDRESS(clRetainCommandBufferKHR);
+        GET_EXTENSION_ADDRESS(clFinalizeCommandBufferKHR);
+        GET_EXTENSION_ADDRESS(clEnqueueCommandBufferKHR);
+        GET_EXTENSION_ADDRESS(clCommandBarrierWithWaitListKHR);
+        GET_EXTENSION_ADDRESS(clCommandCopyBufferKHR);
+        GET_EXTENSION_ADDRESS(clCommandCopyBufferRectKHR);
+        GET_EXTENSION_ADDRESS(clCommandCopyBufferToImageKHR);
+        GET_EXTENSION_ADDRESS(clCommandCopyImageKHR);
+        GET_EXTENSION_ADDRESS(clCommandCopyImageToBufferKHR);
+        GET_EXTENSION_ADDRESS(clCommandFillBufferKHR);
+        GET_EXTENSION_ADDRESS(clCommandFillImageKHR);
+        GET_EXTENSION_ADDRESS(clCommandNDRangeKernelKHR);
+        GET_EXTENSION_ADDRESS(clGetCommandBufferInfoKHR);
+#undef GET_EXTENSION_ADDRESS
+        return CL_SUCCESS;
+    }
+
+    clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr;
+    clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr;
+    clRetainCommandBufferKHR_fn clRetainCommandBufferKHR = nullptr;
+    clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr;
+    clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr;
+    clCommandBarrierWithWaitListKHR_fn clCommandBarrierWithWaitListKHR =
+        nullptr;
+    clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr;
+    clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr;
+    clCommandCopyBufferToImageKHR_fn clCommandCopyBufferToImageKHR = nullptr;
+    clCommandCopyImageKHR_fn clCommandCopyImageKHR = nullptr;
+    clCommandCopyImageToBufferKHR_fn clCommandCopyImageToBufferKHR = nullptr;
+    clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr;
+    clCommandFillImageKHR_fn clCommandFillImageKHR = nullptr;
+    clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr;
+    clGetCommandBufferInfoKHR_fn clGetCommandBufferInfoKHR = nullptr;
+
+    cl_device_id device = nullptr;
+};
+
+// Wrapper class based off generic typeWrappers.h wrappers. However, because
+// the release/retain functions are queried at runtime from the platform,
+// rather than known at compile time we cannot link the instantiated template.
+// Instead, pass an instance of `CommandBufferTestBase` on wrapper construction
+// to access the release/retain functions.
+class clCommandBufferWrapper {
+    cl_command_buffer_khr object = nullptr;
+
+    void retain()
+    {
+        if (!object) return;
+
+        auto err = base->clRetainCommandBufferKHR(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clRetainCommandBufferKHR() failed");
+            std::abort();
+        }
+    }
+
+    void release()
+    {
+        if (!object) return;
+
+        auto err = base->clReleaseCommandBufferKHR(object);
+        if (err != CL_SUCCESS)
+        {
+            print_error(err, "clReleaseCommandBufferKHR() failed");
+            std::abort();
+        }
+    }
+
+    // Used to access release/retain functions
+    CommandBufferTestBase *base;
+
+public:
+    // We always want to have base available to dereference
+    clCommandBufferWrapper() = delete;
+
+    clCommandBufferWrapper(CommandBufferTestBase *base): base(base) {}
+
+    // On assignment, assume the object has a refcount of one.
+    clCommandBufferWrapper &operator=(cl_command_buffer_khr rhs)
+    {
+        reset(rhs);
+        return *this;
+    }
+
+    // Copy semantics, increase retain count.
+    clCommandBufferWrapper(clCommandBufferWrapper const &w) { *this = w; }
+    clCommandBufferWrapper &operator=(clCommandBufferWrapper const &w)
+    {
+        reset(w.object);
+        retain();
+        return *this;
+    }
+
+    // Move semantics, directly take ownership.
+    clCommandBufferWrapper(clCommandBufferWrapper &&w) { *this = std::move(w); }
+    clCommandBufferWrapper &operator=(clCommandBufferWrapper &&w)
+    {
+        reset(w.object);
+        w.object = nullptr;
+        return *this;
+    }
+
+    ~clCommandBufferWrapper() { reset(); }
+
+    // Release the existing object, if any, and own the new one, if any.
+    void reset(cl_command_buffer_khr new_object = nullptr)
+    {
+        release();
+        object = new_object;
+    }
+
+    operator cl_command_buffer_khr() const { return object; }
+};
+
+#define CHECK_COMMAND_BUFFER_EXTENSION_AVAILABLE(device)                       \
+    {                                                                          \
+        if (!is_extension_available(device, "cl_khr_command_buffer"))          \
+        {                                                                      \
+            log_info(                                                          \
+                "Device does not support 'cl_khr_command_buffer'. Skipping "   \
+                "the test.\n");                                                \
+            return TEST_SKIPPED_ITSELF;                                        \
+        }                                                                      \
+    }
+
+
+#endif // _CL_KHR_COMMAND_BUFFER_TEST_BASE_H
diff --git a/test_conformance/extensions/cl_khr_command_buffer/main.cpp b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
new file mode 100644
index 00000000..4dece455
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/main.cpp
@@ -0,0 +1,35 @@
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "procs.h"
+#include "harness/testHarness.h"
+
+test_definition test_list[] = {
+    ADD_TEST(single_ndrange), ADD_TEST(interleaved_enqueue),
+    ADD_TEST(mixed_commands), ADD_TEST(explicit_flush),
+    ADD_TEST(user_events),    ADD_TEST(out_of_order)
+};
+
+
+int main(int argc, const char *argv[])
+{
+    // A device may report the required properties of a queue that
+    // is compatible with command-buffers via the query
+    // CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR. We account
+    // for this in the tests themselves, rather than here, where we have a
+    // device to query.
+    const cl_command_queue_properties queue_properties = 0;
+    return runTestHarnessWithCheck(argc, argv, ARRAY_SIZE(test_list), test_list,
+                                   false, queue_properties, nullptr);
+}
diff --git a/test_conformance/extensions/cl_khr_command_buffer/procs.h b/test_conformance/extensions/cl_khr_command_buffer/procs.h
new file mode 100644
index 00000000..58fd228f
--- /dev/null
+++ b/test_conformance/extensions/cl_khr_command_buffer/procs.h
@@ -0,0 +1,35 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#ifndef _CL_KHR_COMMAND_BUFFER_PROCS_H
+#define _CL_KHR_COMMAND_BUFFER_PROCS_H
+
+#include <CL/cl.h>
+
+// Basic command-buffer tests
+extern int test_single_ndrange(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_interleaved_enqueue(cl_device_id device, cl_context context,
+                                    cl_command_queue queue, int num_elements);
+extern int test_mixed_commands(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_explicit_flush(cl_device_id device, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_user_events(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_out_of_order(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements);
+
+#endif /*_CL_KHR_COMMAND_BUFFER_PROCS_H*/
-- 
cgit v1.2.3


From f94c1357558a78cef2af752240c3f805b4b83ce9 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 1 Sep 2022 06:43:43 +0100
Subject: Use clMemWrapper in math_brute_force (#1476)

Simplify code by avoiding manual resource management.

Original patch by Marco Antognini.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/binary_double.cpp         | 12 +++++-------
 test_conformance/math_brute_force/binary_float.cpp          | 12 +++++-------
 test_conformance/math_brute_force/binary_i_double.cpp       | 12 +++++-------
 test_conformance/math_brute_force/binary_i_float.cpp        | 12 +++++-------
 .../math_brute_force/binary_operator_double.cpp             | 12 +++++-------
 test_conformance/math_brute_force/binary_operator_float.cpp | 12 +++++-------
 test_conformance/math_brute_force/common.h                  |  3 +++
 test_conformance/math_brute_force/macro_binary_double.cpp   | 12 +++++-------
 test_conformance/math_brute_force/macro_binary_float.cpp    | 12 +++++-------
 test_conformance/math_brute_force/macro_unary_double.cpp    | 12 +++---------
 test_conformance/math_brute_force/macro_unary_float.cpp     | 12 +++---------
 test_conformance/math_brute_force/unary_double.cpp          | 13 ++++---------
 test_conformance/math_brute_force/unary_float.cpp           | 13 ++++---------
 13 files changed, 57 insertions(+), 92 deletions(-)

diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 490c17b6..1b1f7d4c 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -124,9 +124,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -793,10 +795,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 01082bc1..d229a376 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -122,9 +122,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -950,10 +952,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index def0bd41..7baa21a2 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -123,9 +123,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -715,10 +717,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index ed207098..3f998e2e 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -121,9 +121,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -708,10 +710,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 992df276..74883664 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -123,9 +123,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -760,10 +762,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index a555beaa..56f293c1 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -121,9 +121,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
@@ -887,10 +889,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index f0d18dd9..6f17898f 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -28,6 +28,9 @@ using KernelMatrix = std::array<std::vector<cl_kernel>, VECTOR_SIZE_COUNT>;
 // Array of programs for each vector size.
 using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
 
+// Array of buffers for each vector size.
+using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
+
 // Information to generate OpenCL kernels.
 struct BuildKernelInfo
 {
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index fb28d823..a697a7be 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -122,9 +122,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     MTdata d;
 
     // Per thread command queue to improve performance
@@ -705,10 +707,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index fd93e2e6..97e2f675 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -120,9 +120,11 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
     MTdata d;
 
     // Per thread command queue to improve performance
@@ -694,10 +696,6 @@ exit:
     for (auto &threadInfo : test_info.tinfo)
     {
         free_mtdata(threadInfo.d);
-        clReleaseMemObject(threadInfo.inBuf);
-        clReleaseMemObject(threadInfo.inBuf2);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
     }
 
     return error;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 2365a195..5a3ad355 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -116,8 +116,9 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    Buffers outBuf;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -475,12 +476,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        clReleaseMemObject(threadInfo.inBuf);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index adc6c3ec..d2982156 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -115,8 +115,9 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    Buffers outBuf;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -489,12 +490,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        clReleaseMemObject(threadInfo.inBuf);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 19402283..7dfc12b1 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -116,8 +116,10 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
 
@@ -513,12 +515,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        clReleaseMemObject(threadInfo.inBuf);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 5a9a7361..6a5c3539 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -114,8 +114,10 @@ cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 // Thread specific data for a worker thread
 struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    Buffers outBuf;
+
     float maxError; // max error value. Init to 0.
     double maxErrorValue; // position of the max error value.  Init to 0.
 
@@ -690,12 +692,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        clReleaseMemObject(threadInfo.inBuf);
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-            clReleaseMemObject(threadInfo.outBuf[j]);
-    }
-
     return error;
 }
-- 
cgit v1.2.3


From 2dc253313047c3ab7f0cf77ae8f8cdf1727ff3b2 Mon Sep 17 00:00:00 2001
From: Romaric Jodin <89833130+rjodinchr@users.noreply.github.com>
Date: Thu, 1 Sep 2022 07:56:10 +0200
Subject: fix test kernel attributes when api fcts are failing (#1449)

test_error returns the err given as the first argument. As the
run_test function returns a bool, we end up returning true (meaning
pass) when an api function fails.
Instead return explicitly false (meaning fail).
---
 test_conformance/api/test_kernel_attributes.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_conformance/api/test_kernel_attributes.cpp b/test_conformance/api/test_kernel_attributes.cpp
index 2e4e0a7f..ad4baa0f 100644
--- a/test_conformance/api/test_kernel_attributes.cpp
+++ b/test_conformance/api/test_kernel_attributes.cpp
@@ -275,16 +275,16 @@ static bool run_test(cl_context context, cl_device_id deviceID,
         clKernelWrapper kernel;
         cl_int err = create_single_kernel_helper(context, &program, &kernel, 1,
                                                  &kernel_src, "test_kernel");
-        test_error(err, "create_single_kernel_helper");
+        test_error_ret(err, "create_single_kernel_helper", false);
 
         // Get the size of the kernel attribute string returned
         size_t size = 0;
         err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, 0, nullptr, &size);
-        test_error(err, "clGetKernelInfo");
+        test_error_ret(err, "clGetKernelInfo", false);
         std::vector<char> attributes(size);
         err = clGetKernelInfo(kernel, CL_KERNEL_ATTRIBUTES, attributes.size(),
                               attributes.data(), nullptr);
-        test_error(err, "clGetKernelInfo");
+        test_error_ret(err, "clGetKernelInfo", false);
         std::string attribute_string(attributes.data());
         attribute_string.erase(
             std::remove(attribute_string.begin(), attribute_string.end(), ' '),
-- 
cgit v1.2.3


From 9ad4899862f95091c95754ed26981c57cb5a52e7 Mon Sep 17 00:00:00 2001
From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com>
Date: Thu, 1 Sep 2022 11:28:13 +0530
Subject: Use size_t instead of cl_int (#1414)

* Use size_t instead of cl_int

Memory is allocated for cl_int,
but mapped as size_t.
Use size_t instead of cl_int during
allocation and mapping for consistency.

* Use size_t instead of cl_int

Memory is allocated for cl_int,
but mapped as size_t.
Use size_t instead of cl_int during
allocation and mapping for consistency.

* Use size_t instead of cl_int

Memory is allocated for cl_int,
but mapped as size_t.
Use size_t instead of cl_int during
allocation and mapping for consistency.

* Remove test_half changes.

Remove test_half changes from other fix
that got included in this commit.

* Final formatting fix.
---
 test_conformance/SVM/test_shared_address_space_coarse_grain.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
index f26981bc..12358167 100644
--- a/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
+++ b/test_conformance/SVM/test_shared_address_space_coarse_grain.cpp
@@ -98,7 +98,9 @@ cl_int create_linked_lists_on_device(int ci, cl_command_queue cmdq, cl_mem alloc
   cl_int error = CL_SUCCESS;
   log_info("SVM: creating linked list on device: %d ", ci);
 
-  size_t *pAllocator = (size_t*) clEnqueueMapBuffer(cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(cl_int), 0, NULL,NULL, &error);
+  size_t *pAllocator = (size_t *)clEnqueueMapBuffer(
+      cmdq, allocator, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, 0, sizeof(size_t),
+      0, NULL, NULL, &error);
   test_error2(error, pAllocator, "clEnqueueMapBuffer failed");
   // reset allocator index
   *pAllocator = numLists;   // the first numLists elements of the nodes array are already allocated (they hold the head of each list).
@@ -206,7 +208,9 @@ int shared_address_space_coarse_grain(cl_device_id deviceID, cl_context context2
     }
 
     // this buffer holds an index into the nodes buffer, it is used for node allocation
-    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                            sizeof(size_t), NULL, &error);
+
     test_error(error, "clCreateBuffer failed.");
 
     error = clGetMemObjectInfo(allocator, CL_MEM_USES_SVM_POINTER, sizeof(cl_bool), &usesSVMpointer, 0);
-- 
cgit v1.2.3


From 7caa4c4421a966e8b4db23aff1ee12cf5c2b7aa6 Mon Sep 17 00:00:00 2001
From: Ewan Crawford <ewan@codeplay.com>
Date: Thu, 1 Sep 2022 23:13:01 +0100
Subject: Update known extensions in compiler define test (#1480)

Add
[cl_khr_command_buffer_mutable_dispatch](https://github.com/KhronosGroup/OpenCL-Docs/pull/819),
[cl_khr_subgroup_rotate](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_subgroup_rotate),
and [cl_khr_extended_async_copies](https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_Ext.html#cl_khr_extended_async_copies)
to the list of known extensions used in
`test_compiler_defines_for_extensions`
---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 4e5b2841..91441416 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -20,7 +20,7 @@
 #include <unistd.h>
 #endif
 
-
+// List should follow order in the extension spec
 const char *known_extensions[] = {
     "cl_khr_byte_addressable_store",
     "cl_khr_3d_image_writes",
@@ -42,6 +42,7 @@ const char *known_extensions[] = {
     "cl_khr_mipmap_image_writes",
     "cl_khr_srgb_image_writes",
     "cl_khr_subgroup_named_barrier",
+    "cl_khr_extended_async_copies",
     "cl_khr_subgroup_extended_types",
     "cl_khr_subgroup_non_uniform_vote",
     "cl_khr_subgroup_ballot",
@@ -51,6 +52,7 @@ const char *known_extensions[] = {
     "cl_khr_subgroup_clustered_reduce",
     "cl_khr_extended_bit_ops",
     "cl_khr_integer_dot_product",
+    "cl_khr_subgroup_rotate",
     // API-only extensions after this point.  If you add above here, modify
     // first_API_extension below.
     "cl_khr_icd",
@@ -82,10 +84,11 @@ const char *known_extensions[] = {
     "cl_khr_command_buffer",
     "cl_khr_external_memory",
     "cl_khr_external_memory_opaque_fd",
+    "cl_khr_command_buffer_mutable_dispatch",
 };
 
 size_t num_known_extensions = ARRAY_SIZE(known_extensions);
-size_t first_API_extension = 29;
+size_t first_API_extension = 31;
 
 const char *known_embedded_extensions[] = {
     "cles_khr_int64",
-- 
cgit v1.2.3


From 388944c01cbfc4272d11b3a9d520e2eed2d1288d Mon Sep 17 00:00:00 2001
From: Ahmed <36049290+AhmedAmraniAkdi@users.noreply.github.com>
Date: Tue, 6 Sep 2022 17:53:12 +0100
Subject: Minimum 2 non atomic variables per thread for the c11 atomic fence
 test for embedded profile devices. (#1452)

* Minimum 2 Non atomic variables per thread for an embedded profile device - https://github.com/KhronosGroup/OpenCL-CTS/issues/1274

* Formatting
---
 test_conformance/c11_atomics/common.h         |    5 +-
 test_conformance/c11_atomics/test_atomics.cpp | 4907 +++++++++++++++----------
 2 files changed, 2960 insertions(+), 1952 deletions(-)

diff --git a/test_conformance/c11_atomics/common.h b/test_conformance/c11_atomics/common.h
index 5bb9e5b7..6c7d0b12 100644
--- a/test_conformance/c11_atomics/common.h
+++ b/test_conformance/c11_atomics/common.h
@@ -1361,9 +1361,8 @@ int CBasicTest<HostAtomicType, HostDataType>::ExecuteSingleTest(
             error =
                 clSetKernelArg(kernel, argInd++,
                                LocalRefValues() ? typeSize
-                                       * ((CurrentGroupSize()
-                                           * NumNonAtomicVariablesPerThread())
-                                          + 4)
+                                       * (CurrentGroupSize()
+                                          * NumNonAtomicVariablesPerThread())
                                                 : 1,
                                NULL);
             test_error(error, "Unable to set indexed kernel argument");
diff --git a/test_conformance/c11_atomics/test_atomics.cpp b/test_conformance/c11_atomics/test_atomics.cpp
index 38b4e9a7..09c14ed1 100644
--- a/test_conformance/c11_atomics/test_atomics.cpp
+++ b/test_conformance/c11_atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,2200 +23,3209 @@
 #include <sstream>
 #include <vector>
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestStore : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestStore
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-  CBasicTestStore(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    OldValueCheck(false);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return threadCount;
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    if(MemoryOrder() == MEMORY_ORDER_ACQUIRE ||
-      MemoryOrder() == MEMORY_ORDER_ACQ_REL)
-      return 0; //skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  atomic_store"+postfix+"(&destMemory[tid], tid"+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = (HostDataType)whichDestValue;
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+    CBasicTestStore(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        OldValueCheck(false);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return threadCount;
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
+            || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+            return 0; // skip test - not applicable
+
+        if (CheckCapabilities(MemoryScope(), MemoryOrder())
+            == TEST_SKIPPED_ITSELF)
+            return 0; // skip test - not applicable
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  atomic_store" + postfix + "(&destMemory[tid], tid"
+            + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        host_atomic_store(&destMemory[tid], (HostDataType)tid, MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = (HostDataType)whichDestValue;
+        return true;
+    }
 };
 
-int test_atomic_store_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_store_generic(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              bool useSVM)
 {
-  int error = 0;
-  CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
-  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
-  CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
-  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestStore<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                        useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_float.Execute(deviceID, context, queue, num_elements));
+    CBasicTestStore<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+        TYPE_ATOMIC_DOUBLE, useSVM);
+    EXECUTE_TEST(error,
+                 test_double.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestStore<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestStore<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestStore<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_store(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, int num_elements)
 {
-  return test_atomic_store_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_store_generic(deviceID, context, queue, num_elements,
+                                     false);
 }
 
-int test_svm_atomic_store(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_store(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_store_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_store_generic(deviceID, context, queue, num_elements,
+                                     true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestInit : public CBasicTest<HostAtomicType, HostDataType> {
 public:
-  using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
-  CBasicTestInit(TExplicitAtomicType dataType, bool useSVM) : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    OldValueCheck(false);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return threadCount;
-  }
-  virtual std::string ProgramCore()
-  {
-    return
-      "  atomic_init(&destMemory[tid], tid);\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    host_atomic_init(&destMemory[tid], (HostDataType)tid);
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = (HostDataType)whichDestValue;
-    return true;
-  }
+    using CBasicTest<HostAtomicType, HostDataType>::OldValueCheck;
+    CBasicTestInit(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTest<HostAtomicType, HostDataType>(dataType, useSVM)
+    {
+        OldValueCheck(false);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return threadCount;
+    }
+    virtual std::string ProgramCore()
+    {
+        return "  atomic_init(&destMemory[tid], tid);\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        host_atomic_init(&destMemory[tid], (HostDataType)tid);
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = (HostDataType)whichDestValue;
+        return true;
+    }
 };
 
-int test_atomic_init_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_init_generic(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements,
+                             bool useSVM)
 {
-  int error = 0;
-  CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
-  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
-  CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
-  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestInit<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_float.Execute(deviceID, context, queue, num_elements));
+    CBasicTestInit<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+        TYPE_ATOMIC_DOUBLE, useSVM);
+    EXECUTE_TEST(error,
+                 test_double.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestInit<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestInit<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestInit<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_init(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-  return test_atomic_init_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_init_generic(deviceID, context, queue, num_elements,
+                                    false);
 }
 
-int test_svm_atomic_init(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_init(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-  return test_atomic_init_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_init_generic(deviceID, context, queue, num_elements,
+                                    true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestLoad : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestLoad
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-  CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    OldValueCheck(false);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return threadCount;
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    if(MemoryOrder() == MEMORY_ORDER_RELEASE ||
-      MemoryOrder() == MEMORY_ORDER_ACQ_REL)
-      return 0; //skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
-  }
-  virtual std::string ProgramCore()
-  {
-      // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
-      // should be MEMORY_ORDER_RELEASE
-      std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
-      std::string memoryOrderScopeStore =
-          (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
-          ? (", memory_order_release" + MemoryScopeStr())
-          : memoryOrderScopeLoad;
-      std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
-      return "  atomic_store" + postfix + "(&destMemory[tid], tid"
-          + memoryOrderScopeStore
-          + ");\n"
-            "  oldValues[tid] = atomic_load"
-          + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    host_atomic_store(&destMemory[tid], (HostDataType)tid, MEMORY_ORDER_SEQ_CST);
-    oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[tid], MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = (HostDataType)whichDestValue;
-    return true;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    for(cl_uint i = 0; i < threadCount; i++ )
-    {
-      if(refValues[i] != (HostDataType)i)
-      {
-        log_error("Invalid value for thread %u\n", (cl_uint)i);
-        correct = false;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+    using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+    CBasicTestLoad(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        OldValueCheck(false);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return threadCount;
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        if (MemoryOrder() == MEMORY_ORDER_RELEASE
+            || MemoryOrder() == MEMORY_ORDER_ACQ_REL)
+            return 0; // skip test - not applicable
+
+        if (CheckCapabilities(MemoryScope(), MemoryOrder())
+            == TEST_SKIPPED_ITSELF)
+            return 0; // skip test - not applicable
+
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    virtual std::string ProgramCore()
+    {
+        // In the case this test is run with MEMORY_ORDER_ACQUIRE, the store
+        // should be MEMORY_ORDER_RELEASE
+        std::string memoryOrderScopeLoad = MemoryOrderScopeStr();
+        std::string memoryOrderScopeStore =
+            (MemoryOrder() == MEMORY_ORDER_ACQUIRE)
+            ? (", memory_order_release" + MemoryScopeStr())
+            : memoryOrderScopeLoad;
+        std::string postfix(memoryOrderScopeLoad.empty() ? "" : "_explicit");
+        return "  atomic_store" + postfix + "(&destMemory[tid], tid"
+            + memoryOrderScopeStore
+            + ");\n"
+              "  oldValues[tid] = atomic_load"
+            + postfix + "(&destMemory[tid]" + memoryOrderScopeLoad + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        host_atomic_store(&destMemory[tid], (HostDataType)tid,
+                          MEMORY_ORDER_SEQ_CST);
+        oldValues[tid] = host_atomic_load<HostAtomicType, HostDataType>(
+            &destMemory[tid], MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = (HostDataType)whichDestValue;
+        return true;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            if (refValues[i] != (HostDataType)i)
+            {
+                log_error("Invalid value for thread %u\n", (cl_uint)i);
+                correct = false;
+                return true;
+            }
+        }
         return true;
-      }
     }
-    return true;
-  }
 };
 
-int test_atomic_load_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_load_generic(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements,
+                             bool useSVM)
 {
-  int error = 0;
-  CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
-  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
-  CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
-  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestLoad<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_float.Execute(deviceID, context, queue, num_elements));
+    CBasicTestLoad<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+        TYPE_ATOMIC_DOUBLE, useSVM);
+    EXECUTE_TEST(error,
+                 test_double.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestLoad<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestLoad<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestLoad<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_load(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-  return test_atomic_load_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_load_generic(deviceID, context, queue, num_elements,
+                                    false);
 }
 
-int test_svm_atomic_load(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_load(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-  return test_atomic_load_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_load_generic(deviceID, context, queue, num_elements,
+                                    true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestExchange : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestExchange
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
-  CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(123456);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], tid"+memoryOrderScope+");\n"
-      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
-      "    oldValues[tid] = atomic_exchange"+postfix+"(&destMemory[0], oldValues[tid]"+memoryOrderScope+");\n";
-  }
-
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid, MemoryOrder());
-    for(int i = 0; i < Iterations(); i++)
-      oldValues[tid] = host_atomic_exchange(&destMemory[0], oldValues[tid], MemoryOrder());
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    OldValueCheck(Iterations()%2 == 0); //check is valid for even number of iterations only
-    correct = true;
-    /* We are expecting values from 0 to size-1 and initial value from atomic variable */
-    /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
-    /* Any repeated value is treated as an error */
-    std::vector<bool> tidFound(threadCount);
-    bool startValueFound = false;
-    cl_uint i;
-
-    for(i = 0; i <= threadCount; i++)
-    {
-      cl_uint value;
-      if(i == threadCount)
-        value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
-      else
-        value = (cl_uint)refValues[i];
-      if(value == (cl_uint)StartValue())
-      {
-        // Special initial value
-        if(startValueFound)
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+    CBasicTestExchange(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(123456);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  oldValues[tid] = atomic_exchange" + postfix
+            + "(&destMemory[0], tid" + memoryOrderScope
+            + ");\n"
+              "  for(int i = 0; i < "
+            + IterationsStr()
+            + "; i++)\n"
+              "    oldValues[tid] = atomic_exchange"
+            + postfix + "(&destMemory[0], oldValues[tid]" + memoryOrderScope
+            + ");\n";
+    }
+
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        oldValues[tid] = host_atomic_exchange(&destMemory[0], (HostDataType)tid,
+                                              MemoryOrder());
+        for (int i = 0; i < Iterations(); i++)
+            oldValues[tid] = host_atomic_exchange(
+                &destMemory[0], oldValues[tid], MemoryOrder());
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        OldValueCheck(
+            Iterations() % 2
+            == 0); // check is valid for even number of iterations only
+        correct = true;
+        /* We are expecting values from 0 to size-1 and initial value from
+         * atomic variable */
+        /* These values must be distributed across refValues array and atomic
+         * variable finalVaue[0] */
+        /* Any repeated value is treated as an error */
+        std::vector<bool> tidFound(threadCount);
+        bool startValueFound = false;
+        cl_uint i;
+
+        for (i = 0; i <= threadCount; i++)
         {
-          log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
-          correct = false;
-          return true;
+            cl_uint value;
+            if (i == threadCount)
+                value = (cl_uint)finalValues[0]; // additional value from atomic
+                                                 // variable (last written)
+            else
+                value = (cl_uint)refValues[i];
+            if (value == (cl_uint)StartValue())
+            {
+                // Special initial value
+                if (startValueFound)
+                {
+                    log_error("ERROR: Starting reference value (%u) occurred "
+                              "more thane once\n",
+                              (cl_uint)StartValue());
+                    correct = false;
+                    return true;
+                }
+                startValueFound = true;
+                continue;
+            }
+            if (value >= threadCount)
+            {
+                log_error(
+                    "ERROR: Reference value %u outside of valid range! (%u)\n",
+                    i, value);
+                correct = false;
+                return true;
+            }
+            if (tidFound[value])
+            {
+                log_error("ERROR: Value (%u) occurred more thane once\n",
+                          value);
+                correct = false;
+                return true;
+            }
+            tidFound[value] = true;
         }
-        startValueFound = true;
-        continue;
-      }
-      if(value >= threadCount)
-      {
-        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
-        correct = false;
         return true;
-      }
-      if(tidFound[value])
-      {
-        log_error("ERROR: Value (%u) occurred more thane once\n", value);
-        correct = false;
-        return true;
-      }
-      tidFound[value] = true;
     }
-    return true;
-  }
 };
 
-int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_exchange_generic(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements,
+                                 bool useSVM)
 {
-  int error = 0;
-  CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(TYPE_ATOMIC_FLOAT, useSVM);
-  EXECUTE_TEST(error, test_float.Execute(deviceID, context, queue, num_elements));
-  CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(TYPE_ATOMIC_DOUBLE, useSVM);
-  EXECUTE_TEST(error, test_double.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestExchange<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_FLOAT, HOST_FLOAT> test_float(
+        TYPE_ATOMIC_FLOAT, useSVM);
+    EXECUTE_TEST(error,
+                 test_float.Execute(deviceID, context, queue, num_elements));
+    CBasicTestExchange<HOST_ATOMIC_DOUBLE, HOST_DOUBLE> test_double(
+        TYPE_ATOMIC_DOUBLE, useSVM);
+    EXECUTE_TEST(error,
+                 test_double.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestExchange<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestExchange<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestExchange<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_exchange(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-  return test_atomic_exchange_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
+                                        false);
 }
 
-int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_exchange(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-  return test_atomic_exchange_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_exchange_generic(deviceID, context, queue, num_elements,
+                                        true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestCompareStrong : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareStrong
+    : public CBasicTestMemOrder2Scope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrderScope;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
-  using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
-  using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
-  CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(123456);
-    OldValueCheck(false);
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    if(MemoryOrder2() == MEMORY_ORDER_RELEASE ||
-      MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
-      return 0; // not allowed as 'failure' argument
-    if((MemoryOrder() == MEMORY_ORDER_RELAXED && MemoryOrder2() != MEMORY_ORDER_RELAXED) ||
-      (MemoryOrder() != MEMORY_ORDER_SEQ_CST && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
-      return 0; // failure argument shall be no stronger than the success
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    if (CheckCapabilities(MemoryScope(), MemoryOrder2()) == TEST_SKIPPED_ITSELF)
-        return 0; // skip test - not applicable
-
-    return CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScope();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  ")+DataType().RegularTypeName()+" expected, previous;\n"
-      "  int successCount = 0;\n"
-      "  oldValues[tid] = tid;\n"
-      "  expected = tid;  // force failure at the beginning\n"
-      "  if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
-      "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
-      "  else\n"
-      "  {\n"
-      "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
-      "    {\n"
-      "      previous = expected;\n"
-      "      if(atomic_compare_exchange_strong"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
-      "      {\n"
-      "        oldValues[tid] = expected;\n"
-      "        successCount++;\n"
-      "      }\n"
-      "      else\n"
-      "      {\n"
-      "        if(previous == expected) // spurious failure - shouldn't occur for 'strong'\n"
-      "        {\n"
-      "          oldValues[tid] = threadCount; //mark fail with invalid value\n"
-      "          break;\n"
-      "        }\n"
-      "      }\n"
-      "    }\n"
-      "  }\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    HostDataType expected = (HostDataType)StartValue(), previous;
-    oldValues[tid] = (HostDataType)tid;
-    for(int i = 0; i < Iterations(); i++)
-    {
-      previous = expected;
-      if(host_atomic_compare_exchange(&destMemory[0], &expected, oldValues[tid], MemoryOrder(), MemoryOrder2()))
-        oldValues[tid] = expected;
-      else
-      {
-        if(previous == expected) // shouldn't occur for 'strong'
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryOrder2;
+    using CBasicTestMemOrder2Scope<HostAtomicType,
+                                   HostDataType>::MemoryOrderScope;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::MemoryScope;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::Iterations;
+    using CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>::IterationsStr;
+    using CBasicTest<HostAtomicType, HostDataType>::CheckCapabilities;
+    CBasicTestCompareStrong(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrder2Scope<HostAtomicType, HostDataType>(dataType,
+                                                                 useSVM)
+    {
+        StartValue(123456);
+        OldValueCheck(false);
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        if (MemoryOrder2() == MEMORY_ORDER_RELEASE
+            || MemoryOrder2() == MEMORY_ORDER_ACQ_REL)
+            return 0; // not allowed as 'failure' argument
+        if ((MemoryOrder() == MEMORY_ORDER_RELAXED
+             && MemoryOrder2() != MEMORY_ORDER_RELAXED)
+            || (MemoryOrder() != MEMORY_ORDER_SEQ_CST
+                && MemoryOrder2() == MEMORY_ORDER_SEQ_CST))
+            return 0; // failure argument shall be no stronger than the success
+
+        if (CheckCapabilities(MemoryScope(), MemoryOrder())
+            == TEST_SKIPPED_ITSELF)
+            return 0; // skip test - not applicable
+
+        if (CheckCapabilities(MemoryScope(), MemoryOrder2())
+            == TEST_SKIPPED_ITSELF)
+            return 0; // skip test - not applicable
+
+        return CBasicTestMemOrder2Scope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScope();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  ") + DataType().RegularTypeName()
+            + " expected, previous;\n"
+              "  int successCount = 0;\n"
+              "  oldValues[tid] = tid;\n"
+              "  expected = tid;  // force failure at the beginning\n"
+              "  if(atomic_compare_exchange_strong"
+            + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+            + memoryOrderScope
+            + ") || expected == tid)\n"
+              "    oldValues[tid] = threadCount+1; //mark unexpected success "
+              "with invalid value\n"
+              "  else\n"
+              "  {\n"
+              "    for(int i = 0; i < "
+            + IterationsStr()
+            + " || successCount == 0; i++)\n"
+              "    {\n"
+              "      previous = expected;\n"
+              "      if(atomic_compare_exchange_strong"
+            + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+            + memoryOrderScope
+            + "))\n"
+              "      {\n"
+              "        oldValues[tid] = expected;\n"
+              "        successCount++;\n"
+              "      }\n"
+              "      else\n"
+              "      {\n"
+              "        if(previous == expected) // spurious failure - "
+              "shouldn't occur for 'strong'\n"
+              "        {\n"
+              "          oldValues[tid] = threadCount; //mark fail with "
+              "invalid value\n"
+              "          break;\n"
+              "        }\n"
+              "      }\n"
+              "    }\n"
+              "  }\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        HostDataType expected = (HostDataType)StartValue(), previous;
+        oldValues[tid] = (HostDataType)tid;
+        for (int i = 0; i < Iterations(); i++)
         {
-          oldValues[tid] = threadCount; //mark fail with invalid value
+            previous = expected;
+            if (host_atomic_compare_exchange(&destMemory[0], &expected,
+                                             oldValues[tid], MemoryOrder(),
+                                             MemoryOrder2()))
+                oldValues[tid] = expected;
+            else
+            {
+                if (previous == expected) // shouldn't occur for 'strong'
+                {
+                    oldValues[tid] = threadCount; // mark fail with invalid
+                                                  // value
+                }
+            }
         }
-      }
-    }
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    /* We are expecting values from 0 to size-1 and initial value from atomic variable */
-    /* These values must be distributed across refValues array and atomic variable finalVaue[0] */
-    /* Any repeated value is treated as an error */
-    std::vector<bool> tidFound(threadCount);
-    bool startValueFound = false;
-    cl_uint i;
-
-    for(i = 0; i <= threadCount; i++)
-    {
-      cl_uint value;
-      if(i == threadCount)
-        value = (cl_uint)finalValues[0]; //additional value from atomic variable (last written)
-      else
-        value = (cl_uint)refValues[i];
-      if(value == (cl_uint)StartValue())
-      {
-        // Special initial value
-        if(startValueFound)
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        /* We are expecting values from 0 to size-1 and initial value from
+         * atomic variable */
+        /* These values must be distributed across refValues array and atomic
+         * variable finalVaue[0] */
+        /* Any repeated value is treated as an error */
+        std::vector<bool> tidFound(threadCount);
+        bool startValueFound = false;
+        cl_uint i;
+
+        for (i = 0; i <= threadCount; i++)
         {
-          log_error("ERROR: Starting reference value (%u) occurred more thane once\n", (cl_uint)StartValue());
-          correct = false;
-          return true;
+            cl_uint value;
+            if (i == threadCount)
+                value = (cl_uint)finalValues[0]; // additional value from atomic
+                                                 // variable (last written)
+            else
+                value = (cl_uint)refValues[i];
+            if (value == (cl_uint)StartValue())
+            {
+                // Special initial value
+                if (startValueFound)
+                {
+                    log_error("ERROR: Starting reference value (%u) occurred "
+                              "more thane once\n",
+                              (cl_uint)StartValue());
+                    correct = false;
+                    return true;
+                }
+                startValueFound = true;
+                continue;
+            }
+            if (value >= threadCount)
+            {
+                if (value == threadCount)
+                    log_error("ERROR: Spurious failure detected for "
+                              "atomic_compare_exchange_strong\n");
+                log_error(
+                    "ERROR: Reference value %u outside of valid range! (%u)\n",
+                    i, value);
+                correct = false;
+                return true;
+            }
+            if (tidFound[value])
+            {
+                log_error("ERROR: Value (%u) occurred more thane once\n",
+                          value);
+                correct = false;
+                return true;
+            }
+            tidFound[value] = true;
         }
-        startValueFound = true;
-        continue;
-      }
-      if(value >= threadCount)
-      {
-        if(value == threadCount)
-          log_error("ERROR: Spurious failure detected for atomic_compare_exchange_strong\n");
-        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
-        correct = false;
-        return true;
-      }
-      if(tidFound[value])
-      {
-        log_error("ERROR: Value (%u) occurred more thane once\n", value);
-        correct = false;
         return true;
-      }
-      tidFound[value] = true;
     }
-    return true;
-  }
 };
 
-int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_compare_exchange_strong_generic(cl_device_id deviceID,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements, bool useSVM)
 {
-  int error = 0;
-  CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestCompareStrong<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                                useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+        TYPE_ATOMIC_UINT, useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+        TYPE_ATOMIC_LONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareStrong<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32>
+            test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestCompareStrong<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64>
+            test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareStrong<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_compare_exchange_strong(cl_device_id deviceID,
+                                        cl_context context,
+                                        cl_command_queue queue,
+                                        int num_elements)
 {
-  return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
+                                                       num_elements, false);
 }
 
-int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_compare_exchange_strong(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements)
 {
-  return test_atomic_compare_exchange_strong_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_compare_exchange_strong_generic(deviceID, context, queue,
+                                                       num_elements, true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestCompareWeak : public CBasicTestCompareStrong<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestCompareWeak
+    : public CBasicTestCompareStrong<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::MemoryOrderScope;
-  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
-  using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
-  CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM) : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScope();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  ")+DataType().RegularTypeName()+" expected , previous;\n"
-      "  int successCount = 0;\n"
-      "  oldValues[tid] = tid;\n"
-      "  expected = tid;  // force failure at the beginning\n"
-      "  if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+") || expected == tid)\n"
-      "    oldValues[tid] = threadCount+1; //mark unexpected success with invalid value\n"
-      "  else\n"
-      "  {\n"
-      "    for(int i = 0; i < "+IterationsStr()+" || successCount == 0; i++)\n"
-      "    {\n"
-      "      previous = expected;\n"
-      "      if(atomic_compare_exchange_weak"+postfix+"(&destMemory[0], &expected, oldValues[tid]"+memoryOrderScope+"))\n"
-      "      {\n"
-      "        oldValues[tid] = expected;\n"
-      "        successCount++;\n"
-      "      }\n"
-      "    }\n"
-      "  }\n";
-  }
+    using CBasicTestCompareStrong<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestCompareStrong<HostAtomicType,
+                                  HostDataType>::MemoryOrderScope;
+    using CBasicTestCompareStrong<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestCompareStrong<HostAtomicType, HostDataType>::Iterations;
+    using CBasicTestCompareStrong<HostAtomicType, HostDataType>::IterationsStr;
+    CBasicTestCompareWeak(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestCompareStrong<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {}
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScope();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  ") + DataType().RegularTypeName()
+            + " expected , previous;\n"
+              "  int successCount = 0;\n"
+              "  oldValues[tid] = tid;\n"
+              "  expected = tid;  // force failure at the beginning\n"
+              "  if(atomic_compare_exchange_weak"
+            + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+            + memoryOrderScope
+            + ") || expected == tid)\n"
+              "    oldValues[tid] = threadCount+1; //mark unexpected success "
+              "with invalid value\n"
+              "  else\n"
+              "  {\n"
+              "    for(int i = 0; i < "
+            + IterationsStr()
+            + " || successCount == 0; i++)\n"
+              "    {\n"
+              "      previous = expected;\n"
+              "      if(atomic_compare_exchange_weak"
+            + postfix + "(&destMemory[0], &expected, oldValues[tid]"
+            + memoryOrderScope
+            + "))\n"
+              "      {\n"
+              "        oldValues[tid] = expected;\n"
+              "        successCount++;\n"
+              "      }\n"
+              "    }\n"
+              "  }\n";
+    }
 };
 
-int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_compare_exchange_weak_generic(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements, bool useSVM)
 {
-  int error = 0;
-  CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestCompareWeak<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+        TYPE_ATOMIC_UINT, useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+        TYPE_ATOMIC_LONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestCompareWeak<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestCompareWeak<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestCompareWeak<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
 {
-  return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
+                                                     num_elements, false);
 }
 
-int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_compare_exchange_weak(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements)
 {
-  return test_atomic_compare_exchange_weak_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_compare_exchange_weak_generic(deviceID, context, queue,
+                                                     num_elements, true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchAdd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAdd
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  oldValues[tid] = atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"+
-      "  atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
-      "  atomic_fetch_add"+postfix+"(&destMemory[0], ("+DataType().AddSubOperandTypeName()+")tid + 3"+memoryOrderScope+");\n"
-      "  atomic_fetch_add"+postfix+"(&destMemory[0], (("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8"+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    oldValues[tid] = host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
-    host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
-    host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3, MemoryOrder());
-    host_atomic_fetch_add(&destMemory[0], ((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8, MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = StartValue();
-    for(cl_uint i = 0; i < threadCount; i++)
-      expected += ((HostDataType)i+3)*3+(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    CBasicTestFetchAdd(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {}
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  oldValues[tid] = atomic_fetch_add" + postfix
+            + "(&destMemory[0], (" + DataType().AddSubOperandTypeName()
+            + ")tid + 3" + memoryOrderScope + ");\n" + "  atomic_fetch_add"
+            + postfix + "(&destMemory[0], ("
+            + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+            + ");\n"
+              "  atomic_fetch_add"
+            + postfix + "(&destMemory[0], ("
+            + DataType().AddSubOperandTypeName() + ")tid + 3" + memoryOrderScope
+            + ");\n"
+              "  atomic_fetch_add"
+            + postfix + "(&destMemory[0], (("
+            + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+            + DataType().AddSubOperandTypeName() + ")-1)*8" + memoryOrderScope
+            + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        oldValues[tid] = host_atomic_fetch_add(
+            &destMemory[0], (HostDataType)tid + 3, MemoryOrder());
+        host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+                              MemoryOrder());
+        host_atomic_fetch_add(&destMemory[0], (HostDataType)tid + 3,
+                              MemoryOrder());
+        host_atomic_fetch_add(&destMemory[0],
+                              ((HostDataType)tid + 3)
+                                  << (sizeof(HostDataType) - 1) * 8,
+                              MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = StartValue();
+        for (cl_uint i = 0; i < threadCount; i++)
+            expected += ((HostDataType)i + 3) * 3
+                + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
+        return true;
+    }
 };
 
-int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_add_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchAdd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAdd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchAdd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAdd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_add(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_add(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_add_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchSub : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchSub
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  oldValues[tid] = atomic_fetch_sub"+postfix+"(&destMemory[0], tid + 3 +((("+DataType().AddSubOperandTypeName()+")tid + 3) << (sizeof("+DataType().AddSubOperandTypeName()+")-1)*8)"+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    oldValues[tid] = host_atomic_fetch_sub(&destMemory[0], (HostDataType)tid + 3+(((HostDataType)tid + 3) << (sizeof(HostDataType)-1)*8), MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = StartValue();
-    for(cl_uint i = 0; i < threadCount; i++)
-      expected -= (HostDataType)i + 3 +(((HostDataType)i + 3) << (sizeof(HostDataType)-1)*8);
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    CBasicTestFetchSub(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {}
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  oldValues[tid] = atomic_fetch_sub" + postfix
+            + "(&destMemory[0], tid + 3 +((("
+            + DataType().AddSubOperandTypeName() + ")tid + 3) << (sizeof("
+            + DataType().AddSubOperandTypeName() + ")-1)*8)" + memoryOrderScope
+            + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        oldValues[tid] = host_atomic_fetch_sub(
+            &destMemory[0],
+            (HostDataType)tid + 3
+                + (((HostDataType)tid + 3) << (sizeof(HostDataType) - 1) * 8),
+            MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = StartValue();
+        for (cl_uint i = 0; i < threadCount; i++)
+            expected -= (HostDataType)i + 3
+                + (((HostDataType)i + 3) << (sizeof(HostDataType) - 1) * 8);
+        return true;
+    }
 };
 
-int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_sub_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchSub<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchSub<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchSub<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchSub<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_sub(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_sub_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchOr : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOr
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(0);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    cl_uint numBits = DataType().Size(deviceID) * 8;
-
-    return (threadCount + numBits - 1) / numBits;
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("    size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
-      "    int whichResult = tid / numBits;\n"
-      "    int bitIndex = tid - (whichResult * numBits);\n"
-      "\n"
-      "    oldValues[tid] = atomic_fetch_or"+postfix+"(&destMemory[whichResult], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    size_t numBits = sizeof(HostDataType) * 8;
-    size_t whichResult = tid / numBits;
-    size_t bitIndex = tid - (whichResult * numBits);
-
-    oldValues[tid] = host_atomic_fetch_or(&destMemory[whichResult], ((HostDataType)1 << bitIndex), MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
-    if(whichDestValue < numValues - 1)
-    {
-      expected = ~(HostDataType)0;
-      return true;
-    }
-    // Last item doesn't get or'ed on every bit, so we have to mask away
-    cl_uint numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
-    expected = StartValue();
-    for(cl_uint i = 0; i < numBits; i++)
-      expected |= ((HostDataType)1 << i);
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    CBasicTestFetchOr(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(0);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        cl_uint numBits = DataType().Size(deviceID) * 8;
+
+        return (threadCount + numBits - 1) / numBits;
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("    size_t numBits = sizeof(")
+            + DataType().RegularTypeName()
+            + ") * 8;\n"
+              "    int whichResult = tid / numBits;\n"
+              "    int bitIndex = tid - (whichResult * numBits);\n"
+              "\n"
+              "    oldValues[tid] = atomic_fetch_or"
+            + postfix + "(&destMemory[whichResult], (("
+            + DataType().RegularTypeName() + ")1 << bitIndex) "
+            + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        size_t numBits = sizeof(HostDataType) * 8;
+        size_t whichResult = tid / numBits;
+        size_t bitIndex = tid - (whichResult * numBits);
+
+        oldValues[tid] =
+            host_atomic_fetch_or(&destMemory[whichResult],
+                                 ((HostDataType)1 << bitIndex), MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
+            / (sizeof(HostDataType) * 8);
+        if (whichDestValue < numValues - 1)
+        {
+            expected = ~(HostDataType)0;
+            return true;
+        }
+        // Last item doesn't get or'ed on every bit, so we have to mask away
+        cl_uint numBits =
+            threadCount - whichDestValue * (sizeof(HostDataType) * 8);
+        expected = StartValue();
+        for (cl_uint i = 0; i < numBits; i++)
+            expected |= ((HostDataType)1 << i);
+        return true;
+    }
 };
 
-int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_or_generic(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements,
+                                 bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchOr<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOr<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchOr<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOr<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_or(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
+                                        false);
 }
 
-int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_or(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_or_generic(deviceID, context, queue, num_elements,
+                                        true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchXor : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue((HostDataType)0x2f08ab418ba0541LL);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  int numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
-      "  int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
-      "\n"
-      "  oldValues[tid] = atomic_fetch_xor"+postfix+"(&destMemory[0], (("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    int numBits = sizeof(HostDataType) * 8;
-    int bitIndex = (numBits-1)*(tid+1)/threadCount;
-
-    oldValues[tid] = host_atomic_fetch_xor(&destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    int numBits = sizeof(HostDataType)*8;
-    expected = StartValue();
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      int bitIndex = (numBits-1)*(i+1)/threadCount;
-      expected ^= ((HostDataType)1 << bitIndex);
-    }
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    CBasicTestFetchXor(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue((HostDataType)0x2f08ab418ba0541LL);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  int numBits = sizeof(")
+            + DataType().RegularTypeName()
+            + ") * 8;\n"
+              "  int bitIndex = (numBits-1)*(tid+1)/threadCount;\n"
+              "\n"
+              "  oldValues[tid] = atomic_fetch_xor"
+            + postfix + "(&destMemory[0], ((" + DataType().RegularTypeName()
+            + ")1 << bitIndex) " + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        int numBits = sizeof(HostDataType) * 8;
+        int bitIndex = (numBits - 1) * (tid + 1) / threadCount;
+
+        oldValues[tid] = host_atomic_fetch_xor(
+            &destMemory[0], ((HostDataType)1 << bitIndex), MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        int numBits = sizeof(HostDataType) * 8;
+        expected = StartValue();
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            int bitIndex = (numBits - 1) * (i + 1) / threadCount;
+            expected ^= ((HostDataType)1 << bitIndex);
+        }
+        return true;
+    }
 };
 
-int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_xor_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchXor<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchXor<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_xor(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_xor_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchAnd
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(~(HostDataType)0);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    cl_uint numBits = DataType().Size(deviceID) * 8;
-
-    return (threadCount + numBits - 1) / numBits;
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  size_t numBits = sizeof(")+DataType().RegularTypeName()+") * 8;\n"
-      "  int whichResult = tid / numBits;\n"
-      "  int bitIndex = tid - (whichResult * numBits);\n"
-      "\n"
-      "  oldValues[tid] = atomic_fetch_and"+postfix+"(&destMemory[whichResult], ~(("+DataType().RegularTypeName()+")1 << bitIndex) "+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    size_t numBits = sizeof(HostDataType) * 8;
-    size_t whichResult = tid / numBits;
-    size_t bitIndex = tid - (whichResult * numBits);
-
-    oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult], ~((HostDataType)1 << bitIndex), MemoryOrder());
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    cl_uint numValues = (threadCount + (sizeof(HostDataType)*8-1)) / (sizeof(HostDataType)*8);
-    if(whichDestValue < numValues - 1)
-    {
-      expected = 0;
-      return true;
-    }
-    // Last item doesn't get and'ed on every bit, so we have to mask away
-    size_t numBits = threadCount - whichDestValue * (sizeof(HostDataType)*8);
-    expected = StartValue();
-    for(size_t i = 0; i < numBits; i++)
-      expected &= ~((HostDataType)1 << i);
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    CBasicTestFetchAnd(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(~(HostDataType)0);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        cl_uint numBits = DataType().Size(deviceID) * 8;
+
+        return (threadCount + numBits - 1) / numBits;
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  size_t numBits = sizeof(")
+            + DataType().RegularTypeName()
+            + ") * 8;\n"
+              "  int whichResult = tid / numBits;\n"
+              "  int bitIndex = tid - (whichResult * numBits);\n"
+              "\n"
+              "  oldValues[tid] = atomic_fetch_and"
+            + postfix + "(&destMemory[whichResult], ~(("
+            + DataType().RegularTypeName() + ")1 << bitIndex) "
+            + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        size_t numBits = sizeof(HostDataType) * 8;
+        size_t whichResult = tid / numBits;
+        size_t bitIndex = tid - (whichResult * numBits);
+
+        oldValues[tid] = host_atomic_fetch_and(&destMemory[whichResult],
+                                               ~((HostDataType)1 << bitIndex),
+                                               MemoryOrder());
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        cl_uint numValues = (threadCount + (sizeof(HostDataType) * 8 - 1))
+            / (sizeof(HostDataType) * 8);
+        if (whichDestValue < numValues - 1)
+        {
+            expected = 0;
+            return true;
+        }
+        // Last item doesn't get and'ed on every bit, so we have to mask away
+        size_t numBits =
+            threadCount - whichDestValue * (sizeof(HostDataType) * 8);
+        expected = StartValue();
+        for (size_t i = 0; i < numBits; i++)
+            expected &= ~((HostDataType)1 << i);
+        return true;
+    }
 };
 
-int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_and_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_and(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_and(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_and_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchOrAnd : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchOrAnd
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
-  CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(0);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
-  }
-  // each thread modifies (with OR and AND operations) and verifies
-  // only one bit in atomic variable
-  // other bits are modified by other threads but it must not affect current thread operation
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
-      "  size_t valueInd = tid/bits;\n"
-      "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
-      "  oldValues[tid] = 0;\n"
-      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
-      "  {\n"
-      "    value = atomic_fetch_or"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
-      "    if(value & bitMask) // bit should be set to 0\n"
-      "      oldValues[tid]++;\n"
-      "    value = atomic_fetch_and"+postfix+"(destMemory+valueInd, ~bitMask"+memoryOrderScope+");\n"
-      "    if(!(value & bitMask)) // bit should be set to 1\n"
-      "      oldValues[tid]++;\n"
-      "  }\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    int bits = sizeof(HostDataType)*8;
-    size_t valueInd = tid/bits;
-    HostDataType value, bitMask = (HostDataType)1 << tid%bits;
-    oldValues[tid] = 0;
-    for(int i = 0; i < Iterations(); i++)
-    {
-      value = host_atomic_fetch_or(destMemory+valueInd, bitMask, MemoryOrder());
-      if(value & bitMask) // bit should be set to 0
-        oldValues[tid]++;
-      value = host_atomic_fetch_and(destMemory+valueInd, ~bitMask, MemoryOrder());
-      if(!(value & bitMask)) // bit should be set to 1
-        oldValues[tid]++;
-    }
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = 0;
-    return true;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      if(refValues[i] > 0)
-      {
-        log_error("Thread %d found %d mismatch(es)\n", i, (cl_uint)refValues[i]);
-        correct = false;
-      }
-    }
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+    CBasicTestFetchOrAnd(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(0);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
+    }
+    // each thread modifies (with OR and AND operations) and verifies
+    // only one bit in atomic variable
+    // other bits are modified by other threads but it must not affect current
+    // thread operation
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  int bits = sizeof(")
+            + DataType().RegularTypeName() + ")*8;\n"
+            + "  size_t valueInd = tid/bits;\n"
+              "  "
+            + DataType().RegularTypeName() + " value, bitMask = ("
+            + DataType().RegularTypeName()
+            + ")1 << tid%bits;\n"
+              "  oldValues[tid] = 0;\n"
+              "  for(int i = 0; i < "
+            + IterationsStr()
+            + "; i++)\n"
+              "  {\n"
+              "    value = atomic_fetch_or"
+            + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+            + ");\n"
+              "    if(value & bitMask) // bit should be set to 0\n"
+              "      oldValues[tid]++;\n"
+              "    value = atomic_fetch_and"
+            + postfix + "(destMemory+valueInd, ~bitMask" + memoryOrderScope
+            + ");\n"
+              "    if(!(value & bitMask)) // bit should be set to 1\n"
+              "      oldValues[tid]++;\n"
+              "  }\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        int bits = sizeof(HostDataType) * 8;
+        size_t valueInd = tid / bits;
+        HostDataType value, bitMask = (HostDataType)1 << tid % bits;
+        oldValues[tid] = 0;
+        for (int i = 0; i < Iterations(); i++)
+        {
+            value = host_atomic_fetch_or(destMemory + valueInd, bitMask,
+                                         MemoryOrder());
+            if (value & bitMask) // bit should be set to 0
+                oldValues[tid]++;
+            value = host_atomic_fetch_and(destMemory + valueInd, ~bitMask,
+                                          MemoryOrder());
+            if (!(value & bitMask)) // bit should be set to 1
+                oldValues[tid]++;
+        }
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = 0;
+        return true;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            if (refValues[i] > 0)
+            {
+                log_error("Thread %d found %d mismatch(es)\n", i,
+                          (cl_uint)refValues[i]);
+                correct = false;
+            }
+        }
+        return true;
+    }
 };
 
-int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_orand_generic(cl_device_id deviceID, cl_context context,
+                                    cl_command_queue queue, int num_elements,
+                                    bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchOrAnd<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                             useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_UINT, HOST_UINT> test_uint(
+        TYPE_ATOMIC_UINT, useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_LONG, HOST_LONG> test_long(
+        TYPE_ATOMIC_LONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchOrAnd<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchOrAnd<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchOrAnd<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
+                            cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_orand_generic(deviceID, context, queue,
+                                           num_elements, false);
 }
 
-int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_orand(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_orand_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_orand_generic(deviceID, context, queue,
+                                           num_elements, true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchXor2 : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchXor2
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
-  CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(0);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return 1+(threadCount-1)/(DataType().Size(deviceID)*8);
-  }
-  // each thread modifies (with XOR operation) and verifies
-  // only one bit in atomic variable
-  // other bits are modified by other threads but it must not affect current thread operation
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      std::string("  int bits = sizeof(")+DataType().RegularTypeName()+")*8;\n"+
-      "  size_t valueInd = tid/bits;\n"
-      "  "+DataType().RegularTypeName()+" value, bitMask = ("+DataType().RegularTypeName()+")1 << tid%bits;\n"
-      "  oldValues[tid] = 0;\n"
-      "  for(int i = 0; i < "+IterationsStr()+"; i++)\n"
-      "  {\n"
-      "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
-      "    if(value & bitMask) // bit should be set to 0\n"
-      "      oldValues[tid]++;\n"
-      "    value = atomic_fetch_xor"+postfix+"(destMemory+valueInd, bitMask"+memoryOrderScope+");\n"
-      "    if(!(value & bitMask)) // bit should be set to 1\n"
-      "      oldValues[tid]++;\n"
-      "  }\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    int bits = sizeof(HostDataType)*8;
-    size_t valueInd = tid/bits;
-    HostDataType value, bitMask = (HostDataType)1 << tid%bits;
-    oldValues[tid] = 0;
-    for(int i = 0; i < Iterations(); i++)
-    {
-      value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
-      if(value & bitMask) // bit should be set to 0
-        oldValues[tid]++;
-      value = host_atomic_fetch_xor(destMemory+valueInd, bitMask, MemoryOrder());
-      if(!(value & bitMask)) // bit should be set to 1
-        oldValues[tid]++;
-    }
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = 0;
-    return true;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      if(refValues[i] > 0)
-      {
-        log_error("Thread %d found %d mismatches\n", i, (cl_uint)refValues[i]);
-        correct = false;
-      }
-    }
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::Iterations;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::IterationsStr;
+    CBasicTestFetchXor2(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(0);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return 1 + (threadCount - 1) / (DataType().Size(deviceID) * 8);
+    }
+    // each thread modifies (with XOR operation) and verifies
+    // only one bit in atomic variable
+    // other bits are modified by other threads but it must not affect current
+    // thread operation
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return std::string("  int bits = sizeof(")
+            + DataType().RegularTypeName() + ")*8;\n"
+            + "  size_t valueInd = tid/bits;\n"
+              "  "
+            + DataType().RegularTypeName() + " value, bitMask = ("
+            + DataType().RegularTypeName()
+            + ")1 << tid%bits;\n"
+              "  oldValues[tid] = 0;\n"
+              "  for(int i = 0; i < "
+            + IterationsStr()
+            + "; i++)\n"
+              "  {\n"
+              "    value = atomic_fetch_xor"
+            + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+            + ");\n"
+              "    if(value & bitMask) // bit should be set to 0\n"
+              "      oldValues[tid]++;\n"
+              "    value = atomic_fetch_xor"
+            + postfix + "(destMemory+valueInd, bitMask" + memoryOrderScope
+            + ");\n"
+              "    if(!(value & bitMask)) // bit should be set to 1\n"
+              "      oldValues[tid]++;\n"
+              "  }\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        int bits = sizeof(HostDataType) * 8;
+        size_t valueInd = tid / bits;
+        HostDataType value, bitMask = (HostDataType)1 << tid % bits;
+        oldValues[tid] = 0;
+        for (int i = 0; i < Iterations(); i++)
+        {
+            value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
+                                          MemoryOrder());
+            if (value & bitMask) // bit should be set to 0
+                oldValues[tid]++;
+            value = host_atomic_fetch_xor(destMemory + valueInd, bitMask,
+                                          MemoryOrder());
+            if (!(value & bitMask)) // bit should be set to 1
+                oldValues[tid]++;
+        }
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = 0;
+        return true;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            if (refValues[i] > 0)
+            {
+                log_error("Thread %d found %d mismatches\n", i,
+                          (cl_uint)refValues[i]);
+                correct = false;
+            }
+        }
+        return true;
+    }
 };
 
-int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_xor2_generic(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements,
+                                   bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchXor2<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                            useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                               useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                               useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchXor2<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchXor2<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchXor2<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_xor2_generic(deviceID, context, queue,
+                                          num_elements, false);
 }
 
-int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_xor2(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_xor2_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_xor2_generic(deviceID, context, queue,
+                                          num_elements, true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchMin : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMin
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(DataType().MaxValue());
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  oldValues[tid] = atomic_fetch_min"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid], MemoryOrder());
-  }
-  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
-  {
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      startRefValues[i] = genrand_int32(d);
-      if(sizeof(HostDataType) >= 8)
-        startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
-    }
-    return true;
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = StartValue();
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      if(startRefValues[ i ] < expected)
-        expected = startRefValues[ i ];
-    }
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    CBasicTestFetchMin(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(DataType().MaxValue());
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  oldValues[tid] = atomic_fetch_min" + postfix
+            + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        oldValues[tid] = host_atomic_fetch_min(&destMemory[0], oldValues[tid],
+                                               MemoryOrder());
+    }
+    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                              MTdata d)
+    {
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            startRefValues[i] = genrand_int32(d);
+            if (sizeof(HostDataType) >= 8)
+                startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+        }
+        return true;
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = StartValue();
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            if (startRefValues[i] < expected) expected = startRefValues[i];
+        }
+        return true;
+    }
 };
 
-int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_min_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchMin<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMin<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchMin<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMin<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_min(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_min(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_min_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFetchMax : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFetchMax
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
 public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(DataType().MinValue());
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    return
-      "  oldValues[tid] = atomic_fetch_max"+postfix+"(&destMemory[0], oldValues[tid] "+memoryOrderScope+");\n";
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid], MemoryOrder());
-  }
-  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
-  {
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      startRefValues[i] = genrand_int32(d);
-      if(sizeof(HostDataType) >= 8)
-        startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
-    }
-    return true;
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = StartValue();
-    for(cl_uint i = 0; i < threadCount; i++)
-    {
-      if(startRefValues[ i ] > expected)
-        expected = startRefValues[ i ];
-    }
-    return true;
-  }
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    CBasicTestFetchMax(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(DataType().MinValue());
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        return "  oldValues[tid] = atomic_fetch_max" + postfix
+            + "(&destMemory[0], oldValues[tid] " + memoryOrderScope + ");\n";
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        oldValues[tid] = host_atomic_fetch_max(&destMemory[0], oldValues[tid],
+                                               MemoryOrder());
+    }
+    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                              MTdata d)
+    {
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            startRefValues[i] = genrand_int32(d);
+            if (sizeof(HostDataType) >= 8)
+                startRefValues[i] |= (HostDataType)genrand_int32(d) << 16;
+        }
+        return true;
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = StartValue();
+        for (cl_uint i = 0; i < threadCount; i++)
+        {
+            if (startRefValues[i] > expected) expected = startRefValues[i];
+        }
+        return true;
+    }
 };
 
-int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fetch_max_generic(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements,
+                                  bool useSVM)
 {
-  int error = 0;
-  CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if(AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFetchMax<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFetchMax<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(
+        TYPE_ATOMIC_ULONG, useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFetchMax<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64>
+            test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFetchMax<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fetch_max(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
+                                         false);
 }
 
-int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fetch_max(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fetch_max_generic(deviceID, context, queue, num_elements,
+                                         true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFlag : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
-  static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
-public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrderScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
-  CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(0);
-    OldValueCheck(false);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return threadCount;
-  }
-  TExplicitMemoryOrderType MemoryOrderForClear()
-  {
-    // Memory ordering for atomic_flag_clear function
-    // ("shall not be memory_order_acquire nor memory_order_acq_rel")
-    if(MemoryOrder() == MEMORY_ORDER_ACQUIRE)
-      return MEMORY_ORDER_RELAXED;
-    if (MemoryOrder() == MEMORY_ORDER_ACQ_REL)
-      return MEMORY_ORDER_RELEASE;
-    return MemoryOrder();
-  }
-  std::string MemoryOrderScopeStrForClear()
-  {
-    std::string orderStr;
-    if (MemoryOrder() != MEMORY_ORDER_EMPTY)
-      orderStr = std::string(", ") + get_memory_order_type_name(MemoryOrderForClear());
-    return orderStr + MemoryScopeStr();
-  }
-
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
-                                cl_command_queue queue)
-  {
-      // This test assumes support for the memory_scope_device scope in the case
-      // that LocalMemory() == false. Therefore we should skip this test in that
-      // configuration on a 3.0 driver since supporting the memory_scope_device
-      // scope is optionaly.
-      if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
-      {
-          if (!LocalMemory()
-              && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
-          {
-              log_info(
-                  "Skipping atomic_flag test due to use of atomic_scope_device "
-                  "which is optionally not supported on this device\n");
-              return 0; // skip test - not applicable
-          }
-      }
-      return CBasicTestMemOrderScope<HostAtomicType,
-                                     HostDataType>::ExecuteSingleTest(deviceID,
-                                                                      context,
-                                                                      queue);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::string memoryOrderScope = MemoryOrderScopeStr();
-    std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
-    std::string program =
-      "  uint cnt, stop = 0;\n"
-      "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
-      "  {\n"
-      "    bool set = atomic_flag_test_and_set" + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
-    if (MemoryOrder() == MEMORY_ORDER_RELAXED
-        || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
-        program += "    atomic_work_item_fence("
-            + std::string(LocalMemory()
-                              ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
-                              : "CLK_GLOBAL_MEM_FENCE, ")
-            + "memory_order_acquire,"
-            + std::string(LocalMemory()
-                              ? "memory_scope_work_group"
-                              : (UseSVM() ? "memory_scope_all_svm_devices"
-                                          : "memory_scope_device"))
-            + ");\n";
-
-    program +=
-      "    if (!set)\n"
-      "    {\n";
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFlag
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+    static const HostDataType CRITICAL_SECTION_NOT_VISITED = 1000000000;
 
-    if (LocalMemory())
-      program += "      uint csIndex = get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
-    else
-      program += "      uint csIndex = cnt;\n";
-
-    std::ostringstream csNotVisited;
-    csNotVisited << CRITICAL_SECTION_NOT_VISITED;
-    program +=
-      "      // verify that thread is the first visitor\n"
-      "      if(oldValues[csIndex] == "+csNotVisited.str()+")\n"
-      "      {\n"
-      "        oldValues[csIndex] = tid; // set the winner id for this critical section\n"
-      "        stop = 1;\n"
-      "      }\n";
-
-    if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
-        || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
-        program += "      atomic_work_item_fence("
-            + std::string(LocalMemory()
-                              ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
-                              : "CLK_GLOBAL_MEM_FENCE, ")
-            + "memory_order_release,"
-            + std::string(LocalMemory()
-                              ? "memory_scope_work_group"
-                              : (UseSVM() ? "memory_scope_all_svm_devices"
-                                          : "memory_scope_device"))
-            + ");\n";
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::MemoryOrderScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    CBasicTestFlag(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
+    {
+        StartValue(0);
+        OldValueCheck(false);
+    }
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
+    {
+        return threadCount;
+    }
+    TExplicitMemoryOrderType MemoryOrderForClear()
+    {
+        // Memory ordering for atomic_flag_clear function
+        // ("shall not be memory_order_acquire nor memory_order_acq_rel")
+        if (MemoryOrder() == MEMORY_ORDER_ACQUIRE) return MEMORY_ORDER_RELAXED;
+        if (MemoryOrder() == MEMORY_ORDER_ACQ_REL) return MEMORY_ORDER_RELEASE;
+        return MemoryOrder();
+    }
+    std::string MemoryOrderScopeStrForClear()
+    {
+        std::string orderStr;
+        if (MemoryOrder() != MEMORY_ORDER_EMPTY)
+            orderStr = std::string(", ")
+                + get_memory_order_type_name(MemoryOrderForClear());
+        return orderStr + MemoryScopeStr();
+    }
 
-    program +=
-      "      atomic_flag_clear" + postfix + "(&destMemory[cnt]" + MemoryOrderScopeStrForClear() + ");\n"
-      "    }\n"
-      "  }\n";
-    return program;
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    cl_uint cnt, stop = 0;
-    for (cnt = 0; !stop && cnt < threadCount; cnt++) // each thread must find critical section where it is the first visitor\n"
-    {
-      if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
-      {
-        cl_uint csIndex = cnt;
-        // verify that thread is the first visitor\n"
-        if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        // This test assumes support for the memory_scope_device scope in the
+        // case that LocalMemory() == false. Therefore we should skip this test
+        // in that configuration on a 3.0 driver since supporting the
+        // memory_scope_device scope is optionaly.
+        if (get_device_cl_version(deviceID) >= Version{ 3, 0 })
+        {
+            if (!LocalMemory()
+                && !(gAtomicFenceCap & CL_DEVICE_ATOMIC_SCOPE_DEVICE))
+            {
+                log_info("Skipping atomic_flag test due to use of "
+                         "atomic_scope_device "
+                         "which is optionally not supported on this device\n");
+                return 0; // skip test - not applicable
+            }
+        }
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::string memoryOrderScope = MemoryOrderScopeStr();
+        std::string postfix(memoryOrderScope.empty() ? "" : "_explicit");
+        std::string program =
+            "  uint cnt, stop = 0;\n"
+            "  for(cnt = 0; !stop && cnt < threadCount; cnt++) // each thread "
+            "must find critical section where it is the first visitor\n"
+            "  {\n"
+            "    bool set = atomic_flag_test_and_set"
+            + postfix + "(&destMemory[cnt]" + memoryOrderScope + ");\n";
+        if (MemoryOrder() == MEMORY_ORDER_RELAXED
+            || MemoryOrder() == MEMORY_ORDER_RELEASE || LocalMemory())
+            program += "    atomic_work_item_fence("
+                + std::string(
+                           LocalMemory()
+                               ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+                               : "CLK_GLOBAL_MEM_FENCE, ")
+                + "memory_order_acquire,"
+                + std::string(LocalMemory()
+                                  ? "memory_scope_work_group"
+                                  : (UseSVM() ? "memory_scope_all_svm_devices"
+                                              : "memory_scope_device"))
+                + ");\n";
+
+        program += "    if (!set)\n"
+                   "    {\n";
+
+        if (LocalMemory())
+            program += "      uint csIndex = "
+                       "get_enqueued_local_size(0)*get_group_id(0)+cnt;\n";
+        else
+            program += "      uint csIndex = cnt;\n";
+
+        std::ostringstream csNotVisited;
+        csNotVisited << CRITICAL_SECTION_NOT_VISITED;
+        program += "      // verify that thread is the first visitor\n"
+                   "      if(oldValues[csIndex] == "
+            + csNotVisited.str()
+            + ")\n"
+              "      {\n"
+              "        oldValues[csIndex] = tid; // set the winner id for this "
+              "critical section\n"
+              "        stop = 1;\n"
+              "      }\n";
+
+        if (MemoryOrder() == MEMORY_ORDER_ACQUIRE
+            || MemoryOrder() == MEMORY_ORDER_RELAXED || LocalMemory())
+            program += "      atomic_work_item_fence("
+                + std::string(
+                           LocalMemory()
+                               ? "CLK_LOCAL_MEM_FENCE | CLK_GLOBAL_MEM_FENCE, "
+                               : "CLK_GLOBAL_MEM_FENCE, ")
+                + "memory_order_release,"
+                + std::string(LocalMemory()
+                                  ? "memory_scope_work_group"
+                                  : (UseSVM() ? "memory_scope_all_svm_devices"
+                                              : "memory_scope_device"))
+                + ");\n";
+
+        program += "      atomic_flag_clear" + postfix + "(&destMemory[cnt]"
+            + MemoryOrderScopeStrForClear()
+            + ");\n"
+              "    }\n"
+              "  }\n";
+        return program;
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        cl_uint cnt, stop = 0;
+        for (cnt = 0; !stop && cnt < threadCount;
+             cnt++) // each thread must find critical section where it is the
+                    // first visitor\n"
         {
-          oldValues[csIndex] = tid; // set the winner id for this critical section\n"
-          stop = 1;
+            if (!host_atomic_flag_test_and_set(&destMemory[cnt], MemoryOrder()))
+            {
+                cl_uint csIndex = cnt;
+                // verify that thread is the first visitor\n"
+                if (oldValues[csIndex] == CRITICAL_SECTION_NOT_VISITED)
+                {
+                    oldValues[csIndex] =
+                        tid; // set the winner id for this critical section\n"
+                    stop = 1;
+                }
+                host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
+            }
         }
-        host_atomic_flag_clear(&destMemory[cnt], MemoryOrderForClear());
-      }
-    }
-  }
-  virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount, HostDataType *startRefValues, cl_uint whichDestValue)
-  {
-    expected = StartValue();
-    return true;
-  }
-  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
-  {
-    for(cl_uint i = 0 ; i < threadCount; i++)
-      startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
-    return true;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    /* We are expecting unique values from 0 to threadCount-1 (each critical section must be visited) */
-    /* These values must be distributed across refValues array */
-    std::vector<bool> tidFound(threadCount);
-    cl_uint i;
-
-    for (i = 0; i < threadCount; i++)
-    {
-      cl_uint value = (cl_uint)refValues[i];
-      if (value == CRITICAL_SECTION_NOT_VISITED)
-      {
-        // Special initial value
-        log_error("ERROR: Critical section %u not visited\n", i);
-        correct = false;
+    }
+    virtual bool ExpectedValue(HostDataType &expected, cl_uint threadCount,
+                               HostDataType *startRefValues,
+                               cl_uint whichDestValue)
+    {
+        expected = StartValue();
         return true;
-      }
-      if (value >= threadCount)
-      {
-        log_error("ERROR: Reference value %u outside of valid range! (%u)\n", i, value);
-        correct = false;
+    }
+    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                              MTdata d)
+    {
+        for (cl_uint i = 0; i < threadCount; i++)
+            startRefValues[i] = CRITICAL_SECTION_NOT_VISITED;
         return true;
-      }
-      if (tidFound[value])
-      {
-        log_error("ERROR: Value (%u) occurred more thane once\n", value);
-        correct = false;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        /* We are expecting unique values from 0 to threadCount-1 (each critical
+         * section must be visited) */
+        /* These values must be distributed across refValues array */
+        std::vector<bool> tidFound(threadCount);
+        cl_uint i;
+
+        for (i = 0; i < threadCount; i++)
+        {
+            cl_uint value = (cl_uint)refValues[i];
+            if (value == CRITICAL_SECTION_NOT_VISITED)
+            {
+                // Special initial value
+                log_error("ERROR: Critical section %u not visited\n", i);
+                correct = false;
+                return true;
+            }
+            if (value >= threadCount)
+            {
+                log_error(
+                    "ERROR: Reference value %u outside of valid range! (%u)\n",
+                    i, value);
+                correct = false;
+                return true;
+            }
+            if (tidFound[value])
+            {
+                log_error("ERROR: Value (%u) occurred more thane once\n",
+                          value);
+                correct = false;
+                return true;
+            }
+            tidFound[value] = true;
+        }
         return true;
-      }
-      tidFound[value] = true;
     }
-    return true;
-  }
 };
 
-int test_atomic_flag_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_flag_generic(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements,
+                             bool useSVM)
 {
-  int error = 0;
-  CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG, useSVM);
-  EXECUTE_TEST(error, test_flag.Execute(deviceID, context, queue, num_elements));
-  return error;
+    int error = 0;
+    CBasicTestFlag<HOST_ATOMIC_FLAG, HOST_FLAG> test_flag(TYPE_ATOMIC_FLAG,
+                                                          useSVM);
+    EXECUTE_TEST(error,
+                 test_flag.Execute(deviceID, context, queue, num_elements));
+    return error;
 }
 
-int test_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_flag(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-  return test_atomic_flag_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_flag_generic(deviceID, context, queue, num_elements,
+                                    false);
 }
 
-int test_svm_atomic_flag(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_flag(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements)
 {
-  return test_atomic_flag_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_flag_generic(deviceID, context, queue, num_elements,
+                                    true);
 }
 
-template<typename HostAtomicType, typename HostDataType>
-class CBasicTestFence : public CBasicTestMemOrderScope<HostAtomicType, HostDataType>
-{
-  struct TestDefinition {
-    bool op1IsFence;
-    TExplicitMemoryOrderType op1MemOrder;
-    bool op2IsFence;
-    TExplicitMemoryOrderType op2MemOrder;
-  };
-public:
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DeclaredInProgram;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::CurrentGroupSize;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
-  using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
-  CBasicTestFence(TExplicitAtomicType dataType, bool useSVM) : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType, useSVM)
-  {
-    StartValue(0);
-    OldValueCheck(false);
-  }
-  virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
-  {
-    return threadCount;
-  }
-  virtual cl_uint NumNonAtomicVariablesPerThread()
-  {
-    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
-      return 1;
-    if (LocalMemory())
-    {
-      if (gIsEmbedded)
-      {
-        if (CurrentGroupSize() > 1024)
-          CurrentGroupSize(1024);
-        return 1; //1KB of local memory required by spec. Clamp group size to 1k and allow 1 variable per thread
-      }
-      else
-        return 32 * 1024 / 8 / CurrentGroupSize() - 1; //32KB of local memory required by spec
-    }
-    return 256;
-  }
-  virtual std::string SingleTestName()
-  {
-    std::string testName;
-    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
-      testName += "seq_cst fence, ";
-    else
-      testName += std::string(get_memory_order_type_name(_subCase.op1MemOrder)).substr(sizeof("memory_order"))
-        + (_subCase.op1IsFence ? " fence" : " atomic") + " synchronizes-with "
-        + std::string(get_memory_order_type_name(_subCase.op2MemOrder)).substr(sizeof("memory_order"))
-        + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
-    testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
-    testName += std::string(", ") + std::string(get_memory_scope_type_name(MemoryScope())).substr(sizeof("memory"));
-    return testName;
-  }
-  virtual bool SVMDataBufferAllSVMConsistent()
-  {
-      // Although memory_scope_all_devices doesn't mention SVM it is just an
-      // alias for memory_scope_all_svm_devices.  So both scopes interact with
-      // SVM allocations, on devices that support those, just the same.
-      return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
-          || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
-  }
-  virtual int ExecuteForEachParameterSet(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    int error = 0;
-    // execute 3 (maximum) sub cases for each memory order
-    for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
+template <typename HostAtomicType, typename HostDataType>
+class CBasicTestFence
+    : public CBasicTestMemOrderScope<HostAtomicType, HostDataType> {
+    struct TestDefinition
     {
-      EXECUTE_TEST(error, (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteForEachParameterSet(deviceID, context, queue)));
-    }
-    return error;
-  }
-  virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context, cl_command_queue queue)
-  {
-    if(DeclaredInProgram() || UsedInFunction())
-      return 0; //skip test - not applicable - no overloaded fence functions for different address spaces
-    if(MemoryOrder() == MEMORY_ORDER_EMPTY ||
-      MemoryScope() == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since opencl20-openclc-rev15
-      return 0; //skip test - not applicable
-    if((UseSVM() || gHost)
-      && LocalMemory())
-      return 0; // skip test - not applicable for SVM and local memory
-    struct TestDefinition acqTests[] = {
-      // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
-      { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
-      { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
-      { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
-    };
-    struct TestDefinition relTests[] = {
-      { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
-      { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
-    };
-    struct TestDefinition arTests[] = {
-      { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
-      { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
-      { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
+        bool op1IsFence;
+        TExplicitMemoryOrderType op1MemOrder;
+        bool op2IsFence;
+        TExplicitMemoryOrderType op2MemOrder;
     };
-    switch (MemoryOrder())
-    {
-    case MEMORY_ORDER_ACQUIRE:
-      if (_subCaseId >= sizeof(acqTests) / sizeof(struct TestDefinition))
-        return 0;
-      _subCase = acqTests[_subCaseId];
-      break;
-    case MEMORY_ORDER_RELEASE:
-      if (_subCaseId >= sizeof(relTests) / sizeof(struct TestDefinition))
-        return 0;
-      _subCase = relTests[_subCaseId];
-      break;
-    case MEMORY_ORDER_ACQ_REL:
-      if (_subCaseId >= sizeof(arTests) / sizeof(struct TestDefinition))
-        return 0;
-      _subCase = arTests[_subCaseId];
-      break;
-    case MEMORY_ORDER_SEQ_CST:
-      if (_subCaseId != 0) // one special case only
-        return 0;
-      break;
-    default:
-      return 0;
-    }
-    LocalRefValues(LocalMemory());
-    return CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context, queue);
-  }
-  virtual std::string ProgramHeader(cl_uint maxNumDestItems)
-  {
-    std::string header;
-    if(gOldAPI)
-    {
-      if(MemoryScope() == MEMORY_SCOPE_EMPTY)
-      {
-        header += "#define atomic_work_item_fence(x,y)                        mem_fence(x)\n";
-      }
-      else
-      {
-        header += "#define atomic_work_item_fence(x,y,z)                      mem_fence(x)\n";
-      }
-    }
-    return header+CBasicTestMemOrderScope<HostAtomicType, HostDataType>::ProgramHeader(maxNumDestItems);
-  }
-  virtual std::string ProgramCore()
-  {
-    std::ostringstream naValues;
-    naValues << NumNonAtomicVariablesPerThread();
-    std::string program, fenceType, nonAtomic;
-    if (LocalMemory())
-    {
-      program = "  size_t myId = get_local_id(0), hisId = get_local_size(0)-1-myId;\n";
-      fenceType = "CLK_LOCAL_MEM_FENCE";
-      nonAtomic = "localValues";
-    }
-    else
+
+public:
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::StartValue;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::OldValueCheck;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryOrder;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScope;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::MemoryScopeStr;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::DeclaredInProgram;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UsedInFunction;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::DataType;
+    using CBasicTestMemOrderScope<HostAtomicType,
+                                  HostDataType>::CurrentGroupSize;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::UseSVM;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalMemory;
+    using CBasicTestMemOrderScope<HostAtomicType, HostDataType>::LocalRefValues;
+    CBasicTestFence(TExplicitAtomicType dataType, bool useSVM)
+        : CBasicTestMemOrderScope<HostAtomicType, HostDataType>(dataType,
+                                                                useSVM)
     {
-      program = "  size_t myId = tid, hisId = threadCount-1-tid;\n";
-      fenceType = "CLK_GLOBAL_MEM_FENCE";
-      nonAtomic = "oldValues";
-    }
-    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
-    {
-      // All threads are divided into pairs.
-      // Each thread has its own atomic variable and performs the following actions:
-      // - increments its own variable
-      // - performs fence operation to propagate its value and to see value from other thread
-      // - reads value from other thread's variable
-      // - repeats the above steps when both values are the same (and less than 1000000)
-      // - stores the last value read from other thread (in additional variable)
-      // At the end of execution at least one thread should know the last value from other thread
-      program += std::string("") +
-        "  " + DataType().RegularTypeName() + " myValue = 0, hisValue; \n"
-        "  do {\n"
-        "    myValue++;\n"
-        "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n"
-        "    atomic_work_item_fence(" + fenceType + ", memory_order_seq_cst" + MemoryScopeStr() + "); \n"
-        "    hisValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
-        "  } while(myValue == hisValue && myValue < 1000000);\n"
-        "  " + nonAtomic + "[myId] = hisValue; \n";
+        StartValue(0);
+        OldValueCheck(false);
     }
-    else
+    virtual cl_uint NumResults(cl_uint threadCount, cl_device_id deviceID)
     {
-      // Each thread modifies one of its non-atomic variables, increments value of its atomic variable
-      // and reads values from another thread in typical synchronizes-with scenario with:
-      // - non-atomic variable (at index A) modification (value change from 0 to A)
-      // - release operation (additional fence or within atomic) + atomic variable modification (value A)
-      // - atomic variable read (value B) + acquire operation (additional fence or within atomic)
-      // - non-atomic variable (at index B) read (value C)
-      // Each thread verifies dependency between atomic and non-atomic value read from another thread
-      // The following condition must be true: B == C
-      program += std::string("") +
-        "  " + DataType().RegularTypeName() + " myValue = 0, hisAtomicValue, hisValue; \n"
-        "  do {\n"
-        "    myValue++;\n"
-        "    " + nonAtomic + "[myId*" + naValues.str() +"+myValue] = myValue;\n";
-      if (_subCase.op1IsFence)
-        program += std::string("") +
-        "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + "); \n"
-        "    atomic_store_explicit(&destMemory[myId], myValue, memory_order_relaxed" + MemoryScopeStr() + ");\n";
-      else
-        program += std::string("") +
-        "    atomic_store_explicit(&destMemory[myId], myValue, " + get_memory_order_type_name(_subCase.op1MemOrder) + MemoryScopeStr() + ");\n";
-      if (_subCase.op2IsFence)
-        program += std::string("") +
-        "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], memory_order_relaxed" + MemoryScopeStr() + ");\n"
-        "    atomic_work_item_fence(" + fenceType + ", " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + "); \n";
-      else
-        program += std::string("") +
-        "    hisAtomicValue = atomic_load_explicit(&destMemory[hisId], " + get_memory_order_type_name(_subCase.op2MemOrder) + MemoryScopeStr() + ");\n";
-      program +=
-        "    hisValue = " + nonAtomic + "[hisId*" + naValues.str() + "+hisAtomicValue]; \n";
-      if (LocalMemory())
-        program += "    hisId = (hisId+1)%get_local_size(0);\n";
-      else
-        program += "    hisId = (hisId+1)%threadCount;\n";
-      program +=
-        "  } while(hisAtomicValue == hisValue && myValue < "+naValues.str()+"-1);\n"
-        "  if(hisAtomicValue != hisValue)\n"
-        "  { // fail\n"
-        "    atomic_store(&destMemory[myId], myValue-1);\n";
-      if (LocalMemory())
-        program += "    hisId = (hisId+get_local_size(0)-1)%get_local_size(0);\n";
-      else
-        program += "    hisId = (hisId+threadCount-1)%threadCount;\n";
-      program +=
-        "    if(myValue+1 < " + naValues.str() + ")\n"
-        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+1] = hisId;\n"
-        "    if(myValue+2 < " + naValues.str() + ")\n"
-        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+2] = hisAtomicValue;\n"
-        "    if(myValue+3 < " + naValues.str() + ")\n"
-        "      " + nonAtomic + "[myId*" + naValues.str() + "+myValue+3] = hisValue;\n";
-      if (gDebug)
-      {
-        program +=
-          "    printf(\"WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\\n\", tid, hisAtomicValue, hisId, hisValue);\n";
-      }
-      program +=
-        "  }\n";
-    }
-    return program;
-  }
-  virtual void HostFunction(cl_uint tid, cl_uint threadCount, volatile HostAtomicType *destMemory, HostDataType *oldValues)
-  {
-    size_t myId = tid, hisId = threadCount - 1 - tid;
-    if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
-    {
-      HostDataType myValue = 0, hisValue;
-      // CPU thread typically starts faster - wait for GPU thread
-      myValue++;
-      host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
-      while (host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_SEQ_CST) == 0);
-      do {
-        myValue++;
-        host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
-        host_atomic_thread_fence(MemoryOrder());
-        hisValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
-      } while (myValue == hisValue && hisValue < 1000000);
-      oldValues[tid] = hisValue;
+        return threadCount;
     }
-    else
+    virtual cl_uint NumNonAtomicVariablesPerThread()
     {
-      HostDataType myValue = 0, hisAtomicValue, hisValue;
-      do {
-        myValue++;
-        oldValues[myId*NumNonAtomicVariablesPerThread()+myValue] = myValue;
-        if (_subCase.op1IsFence)
+        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST) return 1;
+        if (LocalMemory())
         {
-          host_atomic_thread_fence(_subCase.op1MemOrder);
-          host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+            if (gIsEmbedded)
+            {
+                if (CurrentGroupSize() > 512) CurrentGroupSize(512);
+                return 2; // 1KB of local memory required by spec. Clamp group
+                          // size to 512 and allow 2 variables per thread
+            }
+            else
+                return 32 * 1024 / 8 / CurrentGroupSize()
+                    - 1; // 32KB of local memory required by spec
         }
+        return 256;
+    }
+    virtual std::string SingleTestName()
+    {
+        std::string testName;
+        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+            testName += "seq_cst fence, ";
         else
-          host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue, _subCase.op1MemOrder);
-        if (_subCase.op2IsFence)
+            testName +=
+                std::string(get_memory_order_type_name(_subCase.op1MemOrder))
+                    .substr(sizeof("memory_order"))
+                + (_subCase.op1IsFence ? " fence" : " atomic")
+                + " synchronizes-with "
+                + std::string(get_memory_order_type_name(_subCase.op2MemOrder))
+                      .substr(sizeof("memory_order"))
+                + (_subCase.op2IsFence ? " fence" : " atomic") + ", ";
+        testName += CBasicTest<HostAtomicType, HostDataType>::SingleTestName();
+        testName += std::string(", ")
+            + std::string(get_memory_scope_type_name(MemoryScope()))
+                  .substr(sizeof("memory"));
+        return testName;
+    }
+    virtual bool SVMDataBufferAllSVMConsistent()
+    {
+        // Although memory_scope_all_devices doesn't mention SVM it is just an
+        // alias for memory_scope_all_svm_devices.  So both scopes interact with
+        // SVM allocations, on devices that support those, just the same.
+        return MemoryScope() == MEMORY_SCOPE_ALL_DEVICES
+            || MemoryScope() == MEMORY_SCOPE_ALL_SVM_DEVICES;
+    }
+    virtual int ExecuteForEachParameterSet(cl_device_id deviceID,
+                                           cl_context context,
+                                           cl_command_queue queue)
+    {
+        int error = 0;
+        // execute 3 (maximum) sub cases for each memory order
+        for (_subCaseId = 0; _subCaseId < 3; _subCaseId++)
         {
-          hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], MEMORY_ORDER_RELAXED);
-          host_atomic_thread_fence(_subCase.op2MemOrder);
+            EXECUTE_TEST(
+                error,
+                (CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+                     ExecuteForEachParameterSet(deviceID, context, queue)));
         }
-        else
-          hisAtomicValue = host_atomic_load<HostAtomicType, HostDataType>(&destMemory[hisId], _subCase.op2MemOrder);
-        hisValue = oldValues[hisId*NumNonAtomicVariablesPerThread() + hisAtomicValue];
-        hisId = (hisId + 1) % threadCount;
-      } while(hisAtomicValue == hisValue && myValue < (HostDataType)NumNonAtomicVariablesPerThread()-1);
-      if(hisAtomicValue != hisValue)
-      { // fail
-        host_atomic_store<HostAtomicType, HostDataType>(&destMemory[myId], myValue-1, MEMORY_ORDER_SEQ_CST);
-        if (gDebug)
+        return error;
+    }
+    virtual int ExecuteSingleTest(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue)
+    {
+        if (DeclaredInProgram() || UsedInFunction())
+            return 0; // skip test - not applicable - no overloaded fence
+                      // functions for different address spaces
+        if (MemoryOrder() == MEMORY_ORDER_EMPTY
+            || MemoryScope()
+                == MEMORY_SCOPE_EMPTY) // empty 'scope' not required since
+                                       // opencl20-openclc-rev15
+            return 0; // skip test - not applicable
+        if ((UseSVM() || gHost) && LocalMemory())
+            return 0; // skip test - not applicable for SVM and local memory
+        struct TestDefinition acqTests[] = {
+            // {op1IsFence, op1MemOrder, op2IsFence, op2MemOrder}
+            { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+            { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQUIRE },
+            { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQUIRE }
+        };
+        struct TestDefinition relTests[] = {
+            { true, MEMORY_ORDER_RELEASE, false, MEMORY_ORDER_ACQUIRE },
+            { true, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL }
+        };
+        struct TestDefinition arTests[] = {
+            { false, MEMORY_ORDER_RELEASE, true, MEMORY_ORDER_ACQ_REL },
+            { true, MEMORY_ORDER_ACQ_REL, false, MEMORY_ORDER_ACQUIRE },
+            { true, MEMORY_ORDER_ACQ_REL, true, MEMORY_ORDER_ACQ_REL }
+        };
+        switch (MemoryOrder())
         {
-          hisId = (hisId + threadCount - 1) % threadCount;
-          printf("WI %d: atomic value (%d) at index %d is different than non-atomic value (%d)\n", tid, hisAtomicValue, hisId, hisValue);
+            case MEMORY_ORDER_ACQUIRE:
+                if (_subCaseId
+                    >= sizeof(acqTests) / sizeof(struct TestDefinition))
+                    return 0;
+                _subCase = acqTests[_subCaseId];
+                break;
+            case MEMORY_ORDER_RELEASE:
+                if (_subCaseId
+                    >= sizeof(relTests) / sizeof(struct TestDefinition))
+                    return 0;
+                _subCase = relTests[_subCaseId];
+                break;
+            case MEMORY_ORDER_ACQ_REL:
+                if (_subCaseId
+                    >= sizeof(arTests) / sizeof(struct TestDefinition))
+                    return 0;
+                _subCase = arTests[_subCaseId];
+                break;
+            case MEMORY_ORDER_SEQ_CST:
+                if (_subCaseId != 0) // one special case only
+                    return 0;
+                break;
+            default: return 0;
         }
-      }
-    }
-  }
-  virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues, MTdata d)
-  {
-    for(cl_uint i = 0 ; i < threadCount*NumNonAtomicVariablesPerThread(); i++)
-      startRefValues[i] = 0;
-    return true;
-  }
-  virtual bool VerifyRefs(bool &correct, cl_uint threadCount, HostDataType *refValues, HostAtomicType *finalValues)
-  {
-    correct = true;
-    cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
-    for(cl_uint workOffset = 0; workOffset < threadCount; workOffset+= workSize)
-    {
-      if(workOffset+workSize > threadCount)
-        // last workgroup (host threads)
-        workSize = threadCount-workOffset;
-      for(cl_uint i = 0 ; i < workSize && workOffset+i < threadCount; i++)
-      {
-        HostAtomicType myValue = finalValues[workOffset + i];
-        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+        LocalRefValues(LocalMemory());
+        return CBasicTestMemOrderScope<
+            HostAtomicType, HostDataType>::ExecuteSingleTest(deviceID, context,
+                                                             queue);
+    }
+    virtual std::string ProgramHeader(cl_uint maxNumDestItems)
+    {
+        std::string header;
+        if (gOldAPI)
         {
-          HostDataType hisValue = refValues[workOffset + i];
-          if (myValue == hisValue)
-          {
-            // a draw - both threads should reach final value 1000000
-            if (myValue != 1000000)
+            if (MemoryScope() == MEMORY_SCOPE_EMPTY)
             {
-              log_error("ERROR: Invalid reference value #%u (%d instead of 1000000)\n", workOffset + i, myValue);
-              correct = false;
-              return true;
+                header += "#define atomic_work_item_fence(x,y)                 "
+                          "       mem_fence(x)\n";
             }
-          }
-          else
-          {
-            //slower thread (in total order of seq_cst operations) must know last value written by faster thread
-            HostAtomicType hisRealValue = finalValues[workOffset + workSize - 1 - i];
-            HostDataType myValueReadByHim = refValues[workOffset + workSize - 1 - i];
-
-            // who is the winner? - thread with lower private counter value
-            if (myValue == hisRealValue) // forbidden result - fence doesn't work
+            else
             {
-              log_error("ERROR: Atomic counter values #%u and #%u are the same (%u)\n", workOffset + i, workOffset + workSize - 1 - i, myValue);
-              log_error("ERROR: Both threads have outdated values read from another thread (%u and %u)\n", hisValue, myValueReadByHim);
-              correct = false;
-              return true;
+                header += "#define atomic_work_item_fence(x,y,z)               "
+                          "       mem_fence(x)\n";
             }
-            if (myValue > hisRealValue) // I'm slower
+        }
+        return header
+            + CBasicTestMemOrderScope<HostAtomicType, HostDataType>::
+                ProgramHeader(maxNumDestItems);
+    }
+    virtual std::string ProgramCore()
+    {
+        std::ostringstream naValues;
+        naValues << NumNonAtomicVariablesPerThread();
+        std::string program, fenceType, nonAtomic;
+        if (LocalMemory())
+        {
+            program = "  size_t myId = get_local_id(0), hisId = "
+                      "get_local_size(0)-1-myId;\n";
+            fenceType = "CLK_LOCAL_MEM_FENCE";
+            nonAtomic = "localValues";
+        }
+        else
+        {
+            program = "  size_t myId = tid, hisId = threadCount-1-tid;\n";
+            fenceType = "CLK_GLOBAL_MEM_FENCE";
+            nonAtomic = "oldValues";
+        }
+        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+        {
+            // All threads are divided into pairs.
+            // Each thread has its own atomic variable and performs the
+            // following actions:
+            // - increments its own variable
+            // - performs fence operation to propagate its value and to see
+            // value from other thread
+            // - reads value from other thread's variable
+            // - repeats the above steps when both values are the same (and less
+            // than 1000000)
+            // - stores the last value read from other thread (in additional
+            // variable) At the end of execution at least one thread should know
+            // the last value from other thread
+            program += std::string("") + "  " + DataType().RegularTypeName()
+                + " myValue = 0, hisValue; \n"
+                  "  do {\n"
+                  "    myValue++;\n"
+                  "    atomic_store_explicit(&destMemory[myId], myValue, "
+                  "memory_order_relaxed"
+                + MemoryScopeStr()
+                + ");\n"
+                  "    atomic_work_item_fence("
+                + fenceType + ", memory_order_seq_cst" + MemoryScopeStr()
+                + "); \n"
+                  "    hisValue = atomic_load_explicit(&destMemory[hisId], "
+                  "memory_order_relaxed"
+                + MemoryScopeStr()
+                + ");\n"
+                  "  } while(myValue == hisValue && myValue < 1000000);\n"
+                  "  "
+                + nonAtomic + "[myId] = hisValue; \n";
+        }
+        else
+        {
+            // Each thread modifies one of its non-atomic variables, increments
+            // value of its atomic variable and reads values from another thread
+            // in typical synchronizes-with scenario with:
+            // - non-atomic variable (at index A) modification (value change
+            // from 0 to A)
+            // - release operation (additional fence or within atomic) + atomic
+            // variable modification (value A)
+            // - atomic variable read (value B) + acquire operation (additional
+            // fence or within atomic)
+            // - non-atomic variable (at index B) read (value C)
+            // Each thread verifies dependency between atomic and non-atomic
+            // value read from another thread The following condition must be
+            // true: B == C
+            program += std::string("") + "  " + DataType().RegularTypeName()
+                + " myValue = 0, hisAtomicValue, hisValue; \n"
+                  "  do {\n"
+                  "    myValue++;\n"
+                  "    "
+                + nonAtomic + "[myId*" + naValues.str()
+                + "+myValue] = myValue;\n";
+            if (_subCase.op1IsFence)
+                program += std::string("") + "    atomic_work_item_fence("
+                    + fenceType + ", "
+                    + get_memory_order_type_name(_subCase.op1MemOrder)
+                    + MemoryScopeStr()
+                    + "); \n"
+                      "    atomic_store_explicit(&destMemory[myId], myValue, "
+                      "memory_order_relaxed"
+                    + MemoryScopeStr() + ");\n";
+            else
+                program += std::string("")
+                    + "    atomic_store_explicit(&destMemory[myId], myValue, "
+                    + get_memory_order_type_name(_subCase.op1MemOrder)
+                    + MemoryScopeStr() + ");\n";
+            if (_subCase.op2IsFence)
+                program += std::string("")
+                    + "    hisAtomicValue = "
+                      "atomic_load_explicit(&destMemory[hisId], "
+                      "memory_order_relaxed"
+                    + MemoryScopeStr()
+                    + ");\n"
+                      "    atomic_work_item_fence("
+                    + fenceType + ", "
+                    + get_memory_order_type_name(_subCase.op2MemOrder)
+                    + MemoryScopeStr() + "); \n";
+            else
+                program += std::string("")
+                    + "    hisAtomicValue = "
+                      "atomic_load_explicit(&destMemory[hisId], "
+                    + get_memory_order_type_name(_subCase.op2MemOrder)
+                    + MemoryScopeStr() + ");\n";
+            program += "    hisValue = " + nonAtomic + "[hisId*"
+                + naValues.str() + "+hisAtomicValue]; \n";
+            if (LocalMemory())
+                program += "    hisId = (hisId+1)%get_local_size(0);\n";
+            else
+                program += "    hisId = (hisId+1)%threadCount;\n";
+            program += "  } while(hisAtomicValue == hisValue && myValue < "
+                + naValues.str()
+                + "-1);\n"
+                  "  if(hisAtomicValue != hisValue)\n"
+                  "  { // fail\n"
+                  "    atomic_store(&destMemory[myId], myValue-1);\n";
+            if (LocalMemory())
+                program += "    hisId = "
+                           "(hisId+get_local_size(0)-1)%get_local_size(0);\n";
+            else
+                program += "    hisId = (hisId+threadCount-1)%threadCount;\n";
+            program += "    if(myValue+1 < " + naValues.str()
+                + ")\n"
+                  "      "
+                + nonAtomic + "[myId*" + naValues.str()
+                + "+myValue+1] = hisId;\n"
+                  "    if(myValue+2 < "
+                + naValues.str()
+                + ")\n"
+                  "      "
+                + nonAtomic + "[myId*" + naValues.str()
+                + "+myValue+2] = hisAtomicValue;\n"
+                  "    if(myValue+3 < "
+                + naValues.str()
+                + ")\n"
+                  "      "
+                + nonAtomic + "[myId*" + naValues.str()
+                + "+myValue+3] = hisValue;\n";
+            if (gDebug)
             {
-              if (hisRealValue != hisValue)
-              {
-                log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + i, hisValue, hisRealValue);
-                log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + i, workOffset + workSize - 1 - i);
-                correct = false;
-                return true;
-              }
+                program += "    printf(\"WI %d: atomic value (%d) at index %d "
+                           "is different than non-atomic value (%d)\\n\", tid, "
+                           "hisAtomicValue, hisId, hisValue);\n";
             }
-            else // I'm faster
+            program += "  }\n";
+        }
+        return program;
+    }
+    virtual void HostFunction(cl_uint tid, cl_uint threadCount,
+                              volatile HostAtomicType *destMemory,
+                              HostDataType *oldValues)
+    {
+        size_t myId = tid, hisId = threadCount - 1 - tid;
+        if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+        {
+            HostDataType myValue = 0, hisValue;
+            // CPU thread typically starts faster - wait for GPU thread
+            myValue++;
+            host_atomic_store<HostAtomicType, HostDataType>(
+                &destMemory[myId], myValue, MEMORY_ORDER_SEQ_CST);
+            while (host_atomic_load<HostAtomicType, HostDataType>(
+                       &destMemory[hisId], MEMORY_ORDER_SEQ_CST)
+                   == 0)
+                ;
+            do
             {
-              if (myValueReadByHim != myValue)
-              {
-                log_error("ERROR: Invalid reference value #%u (%d instead of %d)\n", workOffset + workSize - 1 - i, myValueReadByHim, myValue);
-                log_error("ERROR: Slower thread #%u should know value written by faster thread #%u\n", workOffset + workSize - 1 - i, workOffset + i);
-                correct = false;
-                return true;
-              }
-            }
-          }
+                myValue++;
+                host_atomic_store<HostAtomicType, HostDataType>(
+                    &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+                host_atomic_thread_fence(MemoryOrder());
+                hisValue = host_atomic_load<HostAtomicType, HostDataType>(
+                    &destMemory[hisId], MEMORY_ORDER_RELAXED);
+            } while (myValue == hisValue && hisValue < 1000000);
+            oldValues[tid] = hisValue;
         }
         else
         {
-          if (myValue != NumNonAtomicVariablesPerThread()-1)
-          {
-            log_error("ERROR: Invalid atomic value #%u (%d instead of %d)\n", workOffset + i, myValue, NumNonAtomicVariablesPerThread()-1);
-            log_error("ERROR: Thread #%u observed invalid values in other thread's variables\n", workOffset + i, myValue);
-            correct = false;
-            return true;
-          }
+            HostDataType myValue = 0, hisAtomicValue, hisValue;
+            do
+            {
+                myValue++;
+                oldValues[myId * NumNonAtomicVariablesPerThread() + myValue] =
+                    myValue;
+                if (_subCase.op1IsFence)
+                {
+                    host_atomic_thread_fence(_subCase.op1MemOrder);
+                    host_atomic_store<HostAtomicType, HostDataType>(
+                        &destMemory[myId], myValue, MEMORY_ORDER_RELAXED);
+                }
+                else
+                    host_atomic_store<HostAtomicType, HostDataType>(
+                        &destMemory[myId], myValue, _subCase.op1MemOrder);
+                if (_subCase.op2IsFence)
+                {
+                    hisAtomicValue =
+                        host_atomic_load<HostAtomicType, HostDataType>(
+                            &destMemory[hisId], MEMORY_ORDER_RELAXED);
+                    host_atomic_thread_fence(_subCase.op2MemOrder);
+                }
+                else
+                    hisAtomicValue =
+                        host_atomic_load<HostAtomicType, HostDataType>(
+                            &destMemory[hisId], _subCase.op2MemOrder);
+                hisValue = oldValues[hisId * NumNonAtomicVariablesPerThread()
+                                     + hisAtomicValue];
+                hisId = (hisId + 1) % threadCount;
+            } while (hisAtomicValue == hisValue
+                     && myValue
+                         < (HostDataType)NumNonAtomicVariablesPerThread() - 1);
+            if (hisAtomicValue != hisValue)
+            { // fail
+                host_atomic_store<HostAtomicType, HostDataType>(
+                    &destMemory[myId], myValue - 1, MEMORY_ORDER_SEQ_CST);
+                if (gDebug)
+                {
+                    hisId = (hisId + threadCount - 1) % threadCount;
+                    printf("WI %d: atomic value (%d) at index %d is different "
+                           "than non-atomic value (%d)\n",
+                           tid, hisAtomicValue, hisId, hisValue);
+                }
+            }
         }
-      }
     }
-    return true;
-  }
+    virtual bool GenerateRefs(cl_uint threadCount, HostDataType *startRefValues,
+                              MTdata d)
+    {
+        for (cl_uint i = 0; i < threadCount * NumNonAtomicVariablesPerThread();
+             i++)
+            startRefValues[i] = 0;
+        return true;
+    }
+    virtual bool VerifyRefs(bool &correct, cl_uint threadCount,
+                            HostDataType *refValues,
+                            HostAtomicType *finalValues)
+    {
+        correct = true;
+        cl_uint workSize = LocalMemory() ? CurrentGroupSize() : threadCount;
+        for (cl_uint workOffset = 0; workOffset < threadCount;
+             workOffset += workSize)
+        {
+            if (workOffset + workSize > threadCount)
+                // last workgroup (host threads)
+                workSize = threadCount - workOffset;
+            for (cl_uint i = 0; i < workSize && workOffset + i < threadCount;
+                 i++)
+            {
+                HostAtomicType myValue = finalValues[workOffset + i];
+                if (MemoryOrder() == MEMORY_ORDER_SEQ_CST)
+                {
+                    HostDataType hisValue = refValues[workOffset + i];
+                    if (myValue == hisValue)
+                    {
+                        // a draw - both threads should reach final value
+                        // 1000000
+                        if (myValue != 1000000)
+                        {
+                            log_error("ERROR: Invalid reference value #%u (%d "
+                                      "instead of 1000000)\n",
+                                      workOffset + i, myValue);
+                            correct = false;
+                            return true;
+                        }
+                    }
+                    else
+                    {
+                        // slower thread (in total order of seq_cst operations)
+                        // must know last value written by faster thread
+                        HostAtomicType hisRealValue =
+                            finalValues[workOffset + workSize - 1 - i];
+                        HostDataType myValueReadByHim =
+                            refValues[workOffset + workSize - 1 - i];
+
+                        // who is the winner? - thread with lower private
+                        // counter value
+                        if (myValue == hisRealValue) // forbidden result - fence
+                                                     // doesn't work
+                        {
+                            log_error("ERROR: Atomic counter values #%u and "
+                                      "#%u are the same (%u)\n",
+                                      workOffset + i,
+                                      workOffset + workSize - 1 - i, myValue);
+                            log_error(
+                                "ERROR: Both threads have outdated values read "
+                                "from another thread (%u and %u)\n",
+                                hisValue, myValueReadByHim);
+                            correct = false;
+                            return true;
+                        }
+                        if (myValue > hisRealValue) // I'm slower
+                        {
+                            if (hisRealValue != hisValue)
+                            {
+                                log_error("ERROR: Invalid reference value #%u "
+                                          "(%d instead of %d)\n",
+                                          workOffset + i, hisValue,
+                                          hisRealValue);
+                                log_error(
+                                    "ERROR: Slower thread #%u should know "
+                                    "value written by faster thread #%u\n",
+                                    workOffset + i,
+                                    workOffset + workSize - 1 - i);
+                                correct = false;
+                                return true;
+                            }
+                        }
+                        else // I'm faster
+                        {
+                            if (myValueReadByHim != myValue)
+                            {
+                                log_error("ERROR: Invalid reference value #%u "
+                                          "(%d instead of %d)\n",
+                                          workOffset + workSize - 1 - i,
+                                          myValueReadByHim, myValue);
+                                log_error(
+                                    "ERROR: Slower thread #%u should know "
+                                    "value written by faster thread #%u\n",
+                                    workOffset + workSize - 1 - i,
+                                    workOffset + i);
+                                correct = false;
+                                return true;
+                            }
+                        }
+                    }
+                }
+                else
+                {
+                    if (myValue != NumNonAtomicVariablesPerThread() - 1)
+                    {
+                        log_error("ERROR: Invalid atomic value #%u (%d instead "
+                                  "of %d)\n",
+                                  workOffset + i, myValue,
+                                  NumNonAtomicVariablesPerThread() - 1);
+                        log_error("ERROR: Thread #%u observed invalid values "
+                                  "in other thread's variables\n",
+                                  workOffset + i, myValue);
+                        correct = false;
+                        return true;
+                    }
+                }
+            }
+        }
+        return true;
+    }
+
 private:
-  int _subCaseId;
-  struct TestDefinition _subCase;
+    int _subCaseId;
+    struct TestDefinition _subCase;
 };
 
-int test_atomic_fence_generic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, bool useSVM)
+int test_atomic_fence_generic(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements,
+                              bool useSVM)
 {
-  int error = 0;
-  CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT, useSVM);
-  EXECUTE_TEST(error, test_int.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT, useSVM);
-  EXECUTE_TEST(error, test_uint.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG, useSVM);
-  EXECUTE_TEST(error, test_long.Execute(deviceID, context, queue, num_elements));
-  CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG, useSVM);
-  EXECUTE_TEST(error, test_ulong.Execute(deviceID, context, queue, num_elements));
-  if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
-  {
-    CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  else
-  {
-    CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(TYPE_ATOMIC_INTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_intptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64> test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
-    EXECUTE_TEST(error, test_uintptr_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(TYPE_ATOMIC_SIZE_T, useSVM);
-    EXECUTE_TEST(error, test_size_t.Execute(deviceID, context, queue, num_elements));
-    CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64> test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
-    EXECUTE_TEST(error, test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
-  }
-  return error;
+    int error = 0;
+    CBasicTestFence<HOST_ATOMIC_INT, HOST_INT> test_int(TYPE_ATOMIC_INT,
+                                                        useSVM);
+    EXECUTE_TEST(error,
+                 test_int.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_UINT, HOST_UINT> test_uint(TYPE_ATOMIC_UINT,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_uint.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_LONG, HOST_LONG> test_long(TYPE_ATOMIC_LONG,
+                                                           useSVM);
+    EXECUTE_TEST(error,
+                 test_long.Execute(deviceID, context, queue, num_elements));
+    CBasicTestFence<HOST_ATOMIC_ULONG, HOST_ULONG> test_ulong(TYPE_ATOMIC_ULONG,
+                                                              useSVM);
+    EXECUTE_TEST(error,
+                 test_ulong.Execute(deviceID, context, queue, num_elements));
+    if (AtomicTypeInfo(TYPE_ATOMIC_SIZE_T).Size(deviceID) == 4)
+    {
+        CBasicTestFence<HOST_ATOMIC_INTPTR_T32, HOST_INTPTR_T32> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_UINTPTR_T32, HOST_UINTPTR_T32>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_SIZE_T32, HOST_SIZE_T32> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_PTRDIFF_T32, HOST_PTRDIFF_T32>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    else
+    {
+        CBasicTestFence<HOST_ATOMIC_INTPTR_T64, HOST_INTPTR_T64> test_intptr_t(
+            TYPE_ATOMIC_INTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_intptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_UINTPTR_T64, HOST_UINTPTR_T64>
+            test_uintptr_t(TYPE_ATOMIC_UINTPTR_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_uintptr_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_SIZE_T64, HOST_SIZE_T64> test_size_t(
+            TYPE_ATOMIC_SIZE_T, useSVM);
+        EXECUTE_TEST(
+            error, test_size_t.Execute(deviceID, context, queue, num_elements));
+        CBasicTestFence<HOST_ATOMIC_PTRDIFF_T64, HOST_PTRDIFF_T64>
+            test_ptrdiff_t(TYPE_ATOMIC_PTRDIFF_T, useSVM);
+        EXECUTE_TEST(
+            error,
+            test_ptrdiff_t.Execute(deviceID, context, queue, num_elements));
+    }
+    return error;
 }
 
-int test_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_fence(cl_device_id deviceID, cl_context context,
+                      cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fence_generic(deviceID, context, queue, num_elements, false);
+    return test_atomic_fence_generic(deviceID, context, queue, num_elements,
+                                     false);
 }
 
-int test_svm_atomic_fence(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_svm_atomic_fence(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
-  return test_atomic_fence_generic(deviceID, context, queue, num_elements, true);
+    return test_atomic_fence_generic(deviceID, context, queue, num_elements,
+                                     true);
 }
-- 
cgit v1.2.3


From fec9d9a238dd38af18c7d606ef0340786917053e Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Wed, 7 Sep 2022 17:28:29 +0100
Subject: [NFC] Fix whitespace issues in run_conformance.py (#1491)

Fix whitespace issues and remove superfluous parens in the
run_conformance.py script.  This addresses 288 out of the 415
issues reported by pylint.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/run_conformance.py | 584 ++++++++++++++++++------------------
 1 file changed, 296 insertions(+), 288 deletions(-)

diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index ea7f6775..52c91697 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -8,295 +8,303 @@
 #//
 #******************************************************************/
 
-import os, re, sys, subprocess, time, commands, tempfile, math, string
+import os
+import re
+import sys
+import subprocess
+import time
+import commands
+import tempfile
+import math
+import string
 
 DEBUG = 0
 
-log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime())+ ".log"
+log_file_name = "opencl_conformance_results_" + time.strftime("%Y-%m-%d_%H-%M", time.localtime()) + ".log"
 process_pid = 0
 
 # The amount of time between printing a "." (if no output from test) or ":" (if output)
 #  to the screen while the tests are running.
-seconds_between_status_updates = 60*60*24*7  # effectively never
+seconds_between_status_updates = 60 * 60 * 24 * 7  # effectively never
+
 
 # Help info
-def write_help_info() :
- print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
- print(" test_list - the .csv file containing the test names and commands to run the tests.")
- print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.")
- print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.")
- print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.")
- print("   (Note: spaces are not allowed in the log file path.")
+def write_help_info():
+    print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
+    print(" test_list - the .csv file containing the test names and commands to run the tests.")
+    print(" [partial-test-names, ...] - optional partial strings to select a subset of the tests to run.")
+    print(" [CL_DEVICE_TYPE(s) to test] - list of CL device types to test, default is CL_DEVICE_TYPE_DEFAULT.")
+    print(" [log=path/to/log/file/] - provide a path for the test log file, default is in the current directory.")
+    print("   (Note: spaces are not allowed in the log file path.")
 
 
 # Get the time formatted nicely
-def get_time() :
- return time.strftime("%d-%b %H:%M:%S", time.localtime())
+def get_time():
+    return time.strftime("%d-%b %H:%M:%S", time.localtime())
+
 
 # Write text to the screen and the log file
-def write_screen_log(text) :
- global log_file
- print(text)
- log_file.write(text+"\n")
+def write_screen_log(text):
+    global log_file
+    print(text)
+    log_file.write(text + "\n")
+
 
 # Load the tests from a csv formated file of the form name,command
 def get_tests(filename, devices_to_test):
- tests = []
- if (os.path.exists(filename) == False):
-  print("FAILED: test_list \"" + filename + "\" does not exist.")
-  print("")
-  write_help_info()
-  sys.exit(-1)
- file = open(filename, 'r')
- for line in file.readlines():
-  comment = re.search("^#.*", line)
-  if (comment):
-   continue
-  device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
-  if (device_specific_match):
-   if (device_specific_match.group(1) in devices_to_test):
-    test_path = string.replace(device_specific_match.group(3), '/', os.sep)
-    test_name = string.replace(device_specific_match.group(2), '/', os.sep)
-    tests.append((test_name, test_path))
-   else:
-    print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
-   continue
-  match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
-  if (match):
-   test_path = string.replace(match.group(2), '/', os.sep)
-   test_name = string.replace(match.group(1), '/', os.sep)
-   tests.append((test_name, test_path))
- return tests
+    tests = []
+    if os.path.exists(filename) == False:
+        print("FAILED: test_list \"" + filename + "\" does not exist.")
+        print("")
+        write_help_info()
+        sys.exit(-1)
+    file = open(filename, 'r')
+    for line in file.readlines():
+        comment = re.search("^#.*", line)
+        if comment:
+            continue
+        device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
+        if device_specific_match:
+            if device_specific_match.group(1) in devices_to_test:
+                test_path = string.replace(device_specific_match.group(3), '/', os.sep)
+                test_name = string.replace(device_specific_match.group(2), '/', os.sep)
+                tests.append((test_name, test_path))
+            else:
+                print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
+            continue
+        match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
+        if match:
+            test_path = string.replace(match.group(2), '/', os.sep)
+            test_name = string.replace(match.group(1), '/', os.sep)
+            tests.append((test_name, test_path))
+    return tests
 
 
 def run_test_checking_output(current_directory, test_dir, log_file):
-  global process_pid, seconds_between_status_updates
-  failures_this_run = 0
-  start_time = time.time()
-  # Create a temporary file for capturing the output from the test
-  (output_fd, output_name) = tempfile.mkstemp()
-  if ( not os.path.exists(output_name)) :
-    write_screen_log("\n           ==> ERROR: could not create temporary file %s ." % output_name)
-    os.close(output_fd)
-    return -1
-  # Execute the test
-  program_to_run = test_dir_without_args = test_dir.split(None, 1)[0]
-  if ( os.sep == '\\' ) : program_to_run += ".exe"
-  if (os.path.exists(current_directory + os.sep + program_to_run)) :
-    os.chdir(os.path.dirname(current_directory+os.sep+test_dir_without_args) )
-    try:
-      if (DEBUG): p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
-      else :  p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True)
-    except OSError:
-      write_screen_log("\n           ==> ERROR: failed to execute test. Failing test. : " + str(OSError))
-      os.close(output_fd)
-      return -1
-  else:
-    write_screen_log("\n           ==> ERROR: test file (" + current_directory + os.sep + program_to_run +") does not exist.  Failing test.")
-    os.close(output_fd)
-    return -1
-  # Set the global pid so we can kill it if this is aborted
-  process_pid = p.pid
-  # Read one character at a time from the temporary output file while the process is running.
-  # When we get an end-of-line, look for errors and write the results to the log file.
-  # This allows us to process the file as it is being produced.
-  # Keep track of the state for reading
-  # Whether we are done, if we have more to read, and where in the file we last read
-  done = False
-  more_to_read = True
-  pointer = 0
-  pointer_at_last_user_update = 0
-  output_this_run = False
-  try:
-    read_output = open(output_name, 'r')
-  except IOError:
-    write_screen_log("\n           ==> ERROR: could not open output file from test.")
-    os.close(output_fd)
-    return -1
-  line = ""
-  while (not done or more_to_read):
-    os.fsync(output_fd)
-    # Determine if we should display some output
-    elapsed_time = (time.time() - start_time)
-    if (elapsed_time > seconds_between_status_updates):
-      start_time = time.time()
-      # If we've received output from the test since the last update, display a #
-      if (pointer != pointer_at_last_user_update):
-        sys.stdout.write(":")
-      else:
-        sys.stdout.write(".")
-      pointer_at_last_user_update = pointer
-      sys.stdout.flush()
-    # Check if we're done
-    p.poll()
-    if (not done and p.returncode != None):
-      if (p.returncode < 0):
-        if (not output_this_run):
-         print ""
-         output_this_run = True
-        write_screen_log("           ==> ERROR: test killed/crashed: " + str(p.returncode)+ ".")
-      done = True
-    # Try reading
+    global process_pid, seconds_between_status_updates
+    failures_this_run = 0
+    start_time = time.time()
+    # Create a temporary file for capturing the output from the test
+    (output_fd, output_name) = tempfile.mkstemp()
+    if not os.path.exists(output_name):
+        write_screen_log("\n           ==> ERROR: could not create temporary file %s ." % output_name)
+        os.close(output_fd)
+        return -1
+    # Execute the test
+    program_to_run = test_dir_without_args = test_dir.split(None, 1)[0]
+    if os.sep == '\\':
+        program_to_run += ".exe"
+    if os.path.exists(current_directory + os.sep + program_to_run):
+        os.chdir(os.path.dirname(current_directory + os.sep + test_dir_without_args))
+        try:
+            if DEBUG: p = subprocess.Popen("", stderr=subprocess.STDOUT, stdout=subprocess.PIPE, shell=True)
+            else: p = subprocess.Popen(current_directory + os.sep + test_dir, stderr=output_fd, stdout=output_fd, shell=True)
+        except OSError:
+            write_screen_log("\n           ==> ERROR: failed to execute test. Failing test. : " + str(OSError))
+            os.close(output_fd)
+            return -1
+    else:
+        write_screen_log("\n           ==> ERROR: test file (" + current_directory + os.sep + program_to_run + ") does not exist.  Failing test.")
+        os.close(output_fd)
+        return -1
+    # Set the global pid so we can kill it if this is aborted
+    process_pid = p.pid
+    # Read one character at a time from the temporary output file while the process is running.
+    # When we get an end-of-line, look for errors and write the results to the log file.
+    # This allows us to process the file as it is being produced.
+    # Keep track of the state for reading
+    # Whether we are done, if we have more to read, and where in the file we last read
+    done = False
+    more_to_read = True
+    pointer = 0
+    pointer_at_last_user_update = 0
+    output_this_run = False
     try:
-      read_output.seek(pointer)
-      char_read = read_output.read(1)
-    except IOError:
-      time.sleep(1)
-      continue
-    # If we got a full line then process it
-    if (char_read == "\n"):
-      # Look for failures and report them as such
-      match = re.search(".*(FAILED|ERROR).*", line)
-      if (match):
-        if (not output_this_run):
-         print ""
-         output_this_run = True
-        print("           ==> " + line.replace('\n',''))
-      match = re.search(".*FAILED.*", line)
-      if (match):
-        failures_this_run = failures_this_run + 1
-      match = re.search(".*(PASSED).*", line)
-      if (match):
-       if (not output_this_run):
-        print ""
-        output_this_run = True
-       print("               " + line.replace('\n',''))
-      # Write it to the log
-      log_file.write("     " + line +"\n")
-      log_file.flush()
-      line = ""
-      pointer = pointer + 1
-    # If we are at the end of the file, then re-open it to get new data
-    elif (char_read == ""):
-      more_to_read = False
-      read_output.close()
-      time.sleep(1)
-      try:
-        os.fsync(output_fd)
         read_output = open(output_name, 'r')
-        # See if there is more to read. This happens if the process ends and we have data left.
-        read_output.seek(pointer)
-        if (read_output.read(1) != ""):
-          more_to_read = True
-      except IOError:
-        write_screen_log("\n           ==> ERROR: could not reopen output file from test.")
+    except IOError:
+        write_screen_log("\n           ==> ERROR: could not open output file from test.")
+        os.close(output_fd)
         return -1
-        done = True
-    else:
-      line = line + char_read
-      pointer = pointer + 1
-  # Now we are done, so write out any remaining data in the file:
-  # This should only happen if the process exited with an error.
-  os.fsync(output_fd)
-  while (read_output.read(1) != ""):
-    log_file.write(read_output.read(1))
-  # Return the total number of failures
-  if (p.returncode == 0 and failures_this_run > 0):
-   write_screen_log("\n           ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) +".")
-   return failures_this_run
-  return p.returncode
-
-
-def run_tests(tests) :
-  global curent_directory
-  global process_pid
-  # Run the tests
-  failures = 0
-  previous_test = None
-  test_number = 1
-  for test in tests:
-   # Print the name of the test we're running and the time
-   (test_name, test_dir) = test
-   if (test_dir != previous_test):
-    print("==========   " + test_dir)
-    log_file.write("========================================================================================\n")
-    log_file.write("========================================================================================\n")
-    log_file.write("(" + get_time() + ")     Running Tests: " + test_dir +"\n")
-    log_file.write("========================================================================================\n")
-    log_file.write("========================================================================================\n")
-    previous_test = test_dir
-   print("("+get_time()+")     BEGIN  " + test_name.ljust(40) +": "),
-   log_file.write("     ----------------------------------------------------------------------------------------\n")
-   log_file.write("     (" + get_time() + ")     Running Sub Test: " + test_name + "\n")
-   log_file.write("     ----------------------------------------------------------------------------------------\n")
-   log_file.flush()
-   sys.stdout.flush()
-
-   # Run the test
-   result = 0
-   start_time = time.time()
-   try:
-    process_pid = 0
-    result = run_test_checking_output(current_directory, test_dir, log_file)
-   except KeyboardInterrupt:
-    # Catch an interrupt from the user
-    write_screen_log("\nFAILED: Execution interrupted.  Killing test process, but not aborting full test run.")
-    os.kill(process_pid, 9)
-    answer = raw_input("Abort all tests? (y/n)")
-    if (answer.find("y") != -1):
-     write_screen_log("\nUser chose to abort all tests.")
-     log_file.close()
-     sys.exit(-1)
-    else:
-     write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.")
-     result = 1
-   run_time = (time.time() - start_time)
-
-   # Move print the finish status
-   if (result == 0):
-    print("("+get_time()+")     PASSED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
-   else:
-    print("("+get_time()+")     FAILED " + test_name.ljust(40) +": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) +")"),
-
-   test_number = test_number + 1
-   log_file.write("     ----------------------------------------------------------------------------------------\n")
-   log_file.flush()
-
-   print("")
-   if (result != 0):
-    log_file.write("  *******************************************************************************************\n")
-    log_file.write("  *  ("+get_time()+")     Test " + test_name + " ==> FAILED: " + str(result)+"\n")
-    log_file.write("  *******************************************************************************************\n")
-    failures = failures + 1
-   else:
-    log_file.write("     ("+get_time()+")     Test " + test_name +" passed in " + str(run_time) + "s\n")
-
-   log_file.write("     ----------------------------------------------------------------------------------------\n")
-   log_file.write("\n")
-  return failures
-
-
-
+    line = ""
+    while not done or more_to_read:
+        os.fsync(output_fd)
+        # Determine if we should display some output
+        elapsed_time = (time.time() - start_time)
+        if elapsed_time > seconds_between_status_updates:
+            start_time = time.time()
+            # If we've received output from the test since the last update, display a #
+            if pointer != pointer_at_last_user_update:
+                sys.stdout.write(":")
+            else:
+                sys.stdout.write(".")
+            pointer_at_last_user_update = pointer
+            sys.stdout.flush()
+        # Check if we're done
+        p.poll()
+        if not done and p.returncode != None:
+            if p.returncode < 0:
+                if not output_this_run:
+                    print ""
+                    output_this_run = True
+                write_screen_log("           ==> ERROR: test killed/crashed: " + str(p.returncode) + ".")
+            done = True
+        # Try reading
+        try:
+            read_output.seek(pointer)
+            char_read = read_output.read(1)
+        except IOError:
+            time.sleep(1)
+            continue
+        # If we got a full line then process it
+        if char_read == "\n":
+            # Look for failures and report them as such
+            match = re.search(".*(FAILED|ERROR).*", line)
+            if match:
+                if not output_this_run:
+                    print ""
+                    output_this_run = True
+                print("           ==> " + line.replace('\n', ''))
+            match = re.search(".*FAILED.*", line)
+            if match:
+                failures_this_run = failures_this_run + 1
+            match = re.search(".*(PASSED).*", line)
+            if match:
+                if not output_this_run:
+                    print ""
+                    output_this_run = True
+                print("               " + line.replace('\n', ''))
+            # Write it to the log
+            log_file.write("     " + line + "\n")
+            log_file.flush()
+            line = ""
+            pointer = pointer + 1
+        # If we are at the end of the file, then re-open it to get new data
+        elif char_read == "":
+            more_to_read = False
+            read_output.close()
+            time.sleep(1)
+            try:
+                os.fsync(output_fd)
+                read_output = open(output_name, 'r')
+                # See if there is more to read. This happens if the process ends and we have data left.
+                read_output.seek(pointer)
+                if read_output.read(1) != "":
+                    more_to_read = True
+            except IOError:
+                write_screen_log("\n           ==> ERROR: could not reopen output file from test.")
+                return -1
+                done = True
+        else:
+            line = line + char_read
+            pointer = pointer + 1
+    # Now we are done, so write out any remaining data in the file:
+    # This should only happen if the process exited with an error.
+    os.fsync(output_fd)
+    while read_output.read(1) != "":
+        log_file.write(read_output.read(1))
+    # Return the total number of failures
+    if (p.returncode == 0 and failures_this_run > 0):
+        write_screen_log("\n           ==> ERROR: Test returned 0, but number of FAILED lines reported is " + str(failures_this_run) + ".")
+        return failures_this_run
+    return p.returncode
+
+
+def run_tests(tests):
+    global curent_directory
+    global process_pid
+    # Run the tests
+    failures = 0
+    previous_test = None
+    test_number = 1
+    for test in tests:
+        # Print the name of the test we're running and the time
+        (test_name, test_dir) = test
+        if test_dir != previous_test:
+            print("==========   " + test_dir)
+            log_file.write("========================================================================================\n")
+            log_file.write("========================================================================================\n")
+            log_file.write("(" + get_time() + ")     Running Tests: " + test_dir + "\n")
+            log_file.write("========================================================================================\n")
+            log_file.write("========================================================================================\n")
+            previous_test = test_dir
+        print("(" + get_time() + ")     BEGIN  " + test_name.ljust(40) + ": "),
+        log_file.write("     ----------------------------------------------------------------------------------------\n")
+        log_file.write("     (" + get_time() + ")     Running Sub Test: " + test_name + "\n")
+        log_file.write("     ----------------------------------------------------------------------------------------\n")
+        log_file.flush()
+        sys.stdout.flush()
+
+        # Run the test
+        result = 0
+        start_time = time.time()
+        try:
+            process_pid = 0
+            result = run_test_checking_output(current_directory, test_dir, log_file)
+        except KeyboardInterrupt:
+            # Catch an interrupt from the user
+            write_screen_log("\nFAILED: Execution interrupted.  Killing test process, but not aborting full test run.")
+            os.kill(process_pid, 9)
+            answer = raw_input("Abort all tests? (y/n)")
+            if answer.find("y") != -1:
+                write_screen_log("\nUser chose to abort all tests.")
+                log_file.close()
+                sys.exit(-1)
+            else:
+                write_screen_log("\nUser chose to continue with other tests. Reporting this test as failed.")
+                result = 1
+        run_time = (time.time() - start_time)
+
+        # Move print the finish status
+        if result == 0:
+            print("(" + get_time() + ")     PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"),
+        else:
+            print("(" + get_time() + ")     FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"),
+
+        test_number = test_number + 1
+        log_file.write("     ----------------------------------------------------------------------------------------\n")
+        log_file.flush()
+
+        print("")
+        if result != 0:
+            log_file.write("  *******************************************************************************************\n")
+            log_file.write("  *  (" + get_time() + ")     Test " + test_name + " ==> FAILED: " + str(result) + "\n")
+            log_file.write("  *******************************************************************************************\n")
+            failures = failures + 1
+        else:
+            log_file.write("     (" + get_time() + ")     Test " + test_name + " passed in " + str(run_time) + "s\n")
+
+        log_file.write("     ----------------------------------------------------------------------------------------\n")
+        log_file.write("\n")
+    return failures
 
 
 # ########################
 # Begin OpenCL conformance run script
 # ########################
 
-if (len(sys.argv) < 2):
- write_help_info()
- sys.exit(-1)
-
+if len(sys.argv) < 2:
+    write_help_info()
+    sys.exit(-1)
 
 current_directory = os.getcwd()
 # Open the log file
 for arg in sys.argv:
- match = re.search("log=(\S+)", arg)
- if (match):
-  log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name
+    match = re.search("log=(\S+)", arg)
+    if match:
+        log_file_name = match.group(1).rstrip('/') + os.sep + log_file_name
 try:
- log_file = open(log_file_name, "w")
+    log_file = open(log_file_name, "w")
 except IOError:
- print "Could not open log file " + log_file_name
+    print "Could not open log file " + log_file_name
 
 # Determine which devices to test
 device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"]
 devices_to_test = []
 for device in device_types:
- if device in sys.argv[2:]:
-  devices_to_test.append(device)
-if (len(devices_to_test) == 0):
- devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"]
+    if device in sys.argv[2:]:
+        devices_to_test.append(device)
+if len(devices_to_test) == 0:
+    devices_to_test = ["CL_DEVICE_TYPE_DEFAULT"]
 write_screen_log("Testing on: " + str(devices_to_test))
 
 # Get the tests
@@ -306,52 +314,52 @@ tests = get_tests(sys.argv[1], devices_to_test)
 tests_to_use = []
 num_of_patterns_to_match = 0
 for arg in sys.argv[2:]:
- if arg in device_types:
-  continue
- if re.search("log=(\S+)", arg):
-  continue
- num_of_patterns_to_match = num_of_patterns_to_match + 1
- found_it = False
- for test in tests:
-  (test_name, test_dir) = test
-  if (test_name.find(arg) != -1 or test_dir.find(arg) != -1):
-   found_it = True
-   if (test not in tests_to_use):
-    tests_to_use.append(test)
- if (found_it == False):
-  print("Failed to find a test matching " + arg)
-if (len(tests_to_use) == 0):
- if (num_of_patterns_to_match > 0):
-  print("FAILED: Failed to find any tests matching the given command-line options.")
-  print("")
-  write_help_info()
-  sys.exit(-1)
+    if arg in device_types:
+        continue
+    if re.search("log=(\S+)", arg):
+        continue
+    num_of_patterns_to_match = num_of_patterns_to_match + 1
+    found_it = False
+    for test in tests:
+        (test_name, test_dir) = test
+        if (test_name.find(arg) != -1 or test_dir.find(arg) != -1):
+            found_it = True
+            if test not in tests_to_use:
+                tests_to_use.append(test)
+    if found_it == False:
+        print("Failed to find a test matching " + arg)
+if len(tests_to_use) == 0:
+    if num_of_patterns_to_match > 0:
+        print("FAILED: Failed to find any tests matching the given command-line options.")
+        print("")
+        write_help_info()
+        sys.exit(-1)
 else:
- tests = tests_to_use[:]
+    tests = tests_to_use[:]
 
 write_screen_log("Test execution arguments: " + str(sys.argv))
-write_screen_log("Logging to file " + log_file_name +".")
+write_screen_log("Logging to file " + log_file_name + ".")
 write_screen_log("Loaded tests from " + sys.argv[1] + ", total of " + str(len(tests)) + " tests selected to run:")
 for (test_name, test_command) in tests:
- write_screen_log(test_name.ljust(50) + " (" + test_command +")")
+    write_screen_log(test_name.ljust(50) + " (" + test_command + ")")
 
 # Run the tests
 total_failures = 0
 for device_to_test in devices_to_test:
- os.environ['CL_DEVICE_TYPE'] = device_to_test
- write_screen_log("========================================================================================")
- write_screen_log("========================================================================================")
- write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90))
- write_screen_log("========================================================================================")
- write_screen_log("========================================================================================")
- failures = run_tests(tests)
- write_screen_log("========================================================================================")
- if (failures == 0):
-  write_screen_log(">> TEST on " + device_to_test + " PASSED")
- else:
-  write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)")
- write_screen_log("========================================================================================")
- total_failures = total_failures + failures
-
-write_screen_log("("+get_time()+") Testing complete.  " + str(total_failures) + " failures for " + str(len(tests)) + " tests.")
+    os.environ['CL_DEVICE_TYPE'] = device_to_test
+    write_screen_log("========================================================================================")
+    write_screen_log("========================================================================================")
+    write_screen_log(("Setting CL_DEVICE_TYPE to " + device_to_test).center(90))
+    write_screen_log("========================================================================================")
+    write_screen_log("========================================================================================")
+    failures = run_tests(tests)
+    write_screen_log("========================================================================================")
+    if failures == 0:
+        write_screen_log(">> TEST on " + device_to_test + " PASSED")
+    else:
+        write_screen_log(">> TEST on " + device_to_test + " FAILED (" + str(failures) + " FAILURES)")
+    write_screen_log("========================================================================================")
+    total_failures = total_failures + failures
+
+write_screen_log("(" + get_time() + ") Testing complete.  " + str(total_failures) + " failures for " + str(len(tests)) + " tests.")
 log_file.close()
-- 
cgit v1.2.3


From 6554c4901825381ee0d4d8ba199a66afff941a1a Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 8 Sep 2022 12:54:36 +0100
Subject: [NFCI] Remove unused variables and enable -Wunused-variable (#1483)

Remove unused variables throughout the code base and enable the
`-Wunused-variable` warning flag globally to prevent new unused
variable issues being introduced in the future.

This is mostly a non-functional change, with one exception:

 - In `test_conformance/api/test_kernel_arg_info.cpp`, an error check
   of the clGetDeviceInfo return value was added.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 CMakeLists.txt                                       |  1 +
 test_common/gl/helpers.cpp                           |  1 -
 test_conformance/SVM/test_byte_granularity.cpp       |  1 -
 test_conformance/SVM/test_migrate.cpp                |  3 ---
 test_conformance/api/test_api_min_max.cpp            |  3 ---
 test_conformance/api/test_kernel_arg_info.cpp        |  5 +----
 test_conformance/api/test_mem_object_info.cpp        |  2 --
 test_conformance/api/test_null_buffer_arg.cpp        |  1 -
 test_conformance/api/test_queries.cpp                | 20 --------------------
 test_conformance/api/test_sub_group_dispatch.cpp     |  4 +---
 test_conformance/basic/test_fpmath_float.cpp         |  2 --
 test_conformance/basic/test_hiloeo.cpp               |  2 --
 test_conformance/basic/test_hostptr.cpp              |  2 --
 test_conformance/basic/test_preprocessors.cpp        |  2 +-
 test_conformance/basic/test_progvar.cpp              |  2 --
 test_conformance/basic/test_queue_priority.cpp       |  6 ------
 test_conformance/basic/test_readimage3d.cpp          |  2 +-
 test_conformance/buffers/test_buffer_migrate.cpp     |  2 +-
 test_conformance/buffers/test_image_migrate.cpp      |  1 -
 .../test_compiler_defines_for_extensions.cpp         |  2 --
 test_conformance/computeinfo/main.cpp                |  6 ------
 test_conformance/conversions/fplib.cpp               |  4 ----
 test_conformance/conversions/test_conversions.cpp    |  2 --
 test_conformance/events/test_callbacks.cpp           |  1 -
 test_conformance/events/test_events.cpp              |  4 ----
 test_conformance/gl/common.h                         |  7 ++++++-
 test_conformance/gl/test_image_methods.cpp           |  2 --
 test_conformance/gl/test_images_write_common.cpp     |  4 ----
 test_conformance/half/Test_vStoreHalf.cpp            |  2 +-
 .../images/clReadWriteImage/test_read_1D.cpp         |  1 -
 .../images/clReadWriteImage/test_read_1D_array.cpp   |  1 -
 .../images/clReadWriteImage/test_read_2D.cpp         |  1 -
 .../images/clReadWriteImage/test_read_2D_array.cpp   |  3 +--
 .../images/clReadWriteImage/test_read_3D.cpp         |  1 -
 .../images/kernel_read_write/CMakeLists.txt          | 10 ++++++++++
 test_conformance/math_brute_force/reference_math.cpp |  4 ++--
 test_conformance/pipes/test_pipe_limits.cpp          |  4 ++--
 test_conformance/pipes/test_pipe_read_write.cpp      |  1 -
 test_conformance/printf/test_printf.cpp              |  4 +---
 test_conformance/printf/util_printf.cpp              |  2 --
 test_conformance/select/test_select.cpp              |  2 --
 test_conformance/spir/run_services.cpp               |  1 -
 test_conformance/spirv_new/main.cpp                  |  1 -
 .../test_cl_khr_spirv_no_integer_wrap_decoration.cpp |  1 -
 test_conformance/spirv_new/test_op_fmath.cpp         |  3 ---
 test_conformance/spirv_new/test_op_function.cpp      |  1 -
 test_conformance/spirv_new/test_op_negate.cpp        |  1 -
 test_conformance/spirv_new/test_op_opaque.cpp        |  1 -
 .../spirv_new/test_op_vector_times_scalar.cpp        |  2 --
 49 files changed, 29 insertions(+), 112 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fe56d0fa..b7c86ba1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -89,6 +89,7 @@ endmacro(add_cxx_flag_if_supported)
 
 if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
     add_cxx_flag_if_supported(-Wmisleading-indentation)
+    add_cxx_flag_if_supported(-Wunused-variable)
     add_cxx_flag_if_supported(-Wno-narrowing)
     add_cxx_flag_if_supported(-Wno-format)
     add_cxx_flag_if_supported(-Werror)
diff --git a/test_common/gl/helpers.cpp b/test_common/gl/helpers.cpp
index def78d75..b9f95a94 100644
--- a/test_common/gl/helpers.cpp
+++ b/test_common/gl/helpers.cpp
@@ -1381,7 +1381,6 @@ void * CreateGLTexture2DArrayMultisample(size_t width, size_t height,
 
   //calculating colors
   double color_delta = 1.0 / (total_layers * samples);
-  double color = color_delta;
 
   if (attachment != GL_DEPTH_ATTACHMENT && attachment != GL_DEPTH_STENCIL_ATTACHMENT) {
     glDisable(GL_DEPTH_TEST);
diff --git a/test_conformance/SVM/test_byte_granularity.cpp b/test_conformance/SVM/test_byte_granularity.cpp
index 403528b9..6dbb3649 100644
--- a/test_conformance/SVM/test_byte_granularity.cpp
+++ b/test_conformance/SVM/test_byte_granularity.cpp
@@ -58,7 +58,6 @@ int test_svm_byte_granularity(cl_device_id deviceID, cl_context c, cl_command_qu
 
   cl_uint     num_devices = 0;
   cl_int      err = CL_SUCCESS;
-  cl_int        rval = CL_SUCCESS;
 
   err = create_cl_objects(deviceID, &byte_manipulation_kernels[0], &context, &program, &queues[0], &num_devices, CL_DEVICE_SVM_FINE_GRAIN_BUFFER);
   if(err == 1) return 0; // no devices capable of requested SVM level, so don't execute but count test as passing.
diff --git a/test_conformance/SVM/test_migrate.cpp b/test_conformance/SVM/test_migrate.cpp
index 2a1ce051..f624bcd9 100644
--- a/test_conformance/SVM/test_migrate.cpp
+++ b/test_conformance/SVM/test_migrate.cpp
@@ -78,9 +78,6 @@ int test_svm_migrate(cl_device_id deviceID, cl_context c, cl_command_queue queue
     cl_uint amem[GLOBAL_SIZE];
     cl_uint bmem[GLOBAL_SIZE];
     cl_uint cmem[GLOBAL_SIZE];
-    cl_uint ramem[GLOBAL_SIZE];
-    cl_uint rbmem[GLOBAL_SIZE];
-    cl_uint rcmem[GLOBAL_SIZE];
     cl_event evs[20];
 
     const size_t global_size = GLOBAL_SIZE;
diff --git a/test_conformance/api/test_api_min_max.cpp b/test_conformance/api/test_api_min_max.cpp
index 9e08b16d..086008d7 100644
--- a/test_conformance/api/test_api_min_max.cpp
+++ b/test_conformance/api/test_api_min_max.cpp
@@ -665,8 +665,6 @@ int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context,
     cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
     cl_uint minRequiredDimension;
-    size_t length;
-
 
     PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
@@ -746,7 +744,6 @@ int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context,
     cl_image_format image_format_desc;
     cl_ulong maxAllocSize;
     cl_uint minRequiredDimension;
-    size_t length;
 
     PASSIVE_REQUIRE_IMAGE_SUPPORT(deviceID)
 
diff --git a/test_conformance/api/test_kernel_arg_info.cpp b/test_conformance/api/test_kernel_arg_info.cpp
index 28825f10..d0681dfd 100644
--- a/test_conformance/api/test_kernel_arg_info.cpp
+++ b/test_conformance/api/test_kernel_arg_info.cpp
@@ -22,11 +22,8 @@
 
 #define MINIMUM_OPENCL_PIPE_VERSION Version(2, 0)
 
-static constexpr size_t CL_VERSION_LENGTH = 128;
 static constexpr size_t KERNEL_ARGUMENT_LENGTH = 128;
 static constexpr char KERNEL_ARGUMENT_NAME[] = "argument";
-static constexpr size_t KERNEL_ARGUMENT_NAME_LENGTH =
-    sizeof(KERNEL_ARGUMENT_NAME) + 1;
 static constexpr int SINGLE_KERNEL_ARG_NUMBER = 0;
 static constexpr int MAX_NUMBER_OF_KERNEL_ARGS = 128;
 
@@ -183,7 +180,6 @@ static std::string generate_kernel(const std::vector<KernelArgInfo>& all_args,
     ret += "kernel void get_kernel_arg_info(\n";
     for (int i = 0; i < all_args.size(); ++i)
     {
-        const KernelArgInfo& arg = all_args[i];
         ret += generate_argument(all_args[i]);
         if (i == all_args.size() - 1)
         {
@@ -542,6 +538,7 @@ size_t get_param_size(const std::string& arg_type, cl_device_id deviceID,
         cl_int err = clGetDeviceInfo(deviceID, CL_DEVICE_ADDRESS_BITS,
                                      sizeof(device_address_bits),
                                      &device_address_bits, NULL);
+        test_error_ret(err, "clGetDeviceInfo", 0);
         return (device_address_bits / 8);
     }
 
diff --git a/test_conformance/api/test_mem_object_info.cpp b/test_conformance/api/test_mem_object_info.cpp
index 2afe0437..8dc8f6cf 100644
--- a/test_conformance/api/test_mem_object_info.cpp
+++ b/test_conformance/api/test_mem_object_info.cpp
@@ -363,8 +363,6 @@ int test_get_imageObject_info( cl_mem * image, cl_mem_flags objectFlags, cl_imag
     cl_mem_flags flags;
     cl_uint mapCount;
     cl_uint refCount;
-    size_t rowPitchMultiplier;
-    size_t slicePitchMultiplier;
     cl_context otherCtx;
     size_t offset;
     size_t sz;
diff --git a/test_conformance/api/test_null_buffer_arg.cpp b/test_conformance/api/test_null_buffer_arg.cpp
index d412d4ea..75bdd479 100644
--- a/test_conformance/api/test_null_buffer_arg.cpp
+++ b/test_conformance/api/test_null_buffer_arg.cpp
@@ -149,7 +149,6 @@ int test_null_buffer_arg(cl_device_id device, cl_context context,
     cl_command_queue queue, int num_elements)
 {
     unsigned int test_success = 0;
-    unsigned int i;
     unsigned int buffer_size;
     cl_int status;
     cl_program program;
diff --git a/test_conformance/api/test_queries.cpp b/test_conformance/api/test_queries.cpp
index 30b5706f..a7703a76 100644
--- a/test_conformance/api/test_queries.cpp
+++ b/test_conformance/api/test_queries.cpp
@@ -526,26 +526,6 @@ void CL_CALLBACK mem_obj_destructor_callback( cl_mem, void *data )
     free( data );
 }
 
-// All possible combinations of valid cl_mem_flags.
-static cl_mem_flags all_flags[16] = {
-  0,
-  CL_MEM_READ_WRITE,
-  CL_MEM_READ_ONLY,
-  CL_MEM_WRITE_ONLY,
-  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
-  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
-  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
-  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR,
-  CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR,
-  CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
-  CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
-};
-
 #define TEST_DEVICE_PARAM( device, paramName, val, name, type, cast )    \
 error = clGetDeviceInfo( device, paramName, sizeof( val ), &val, &size );        \
 test_error( error, "Unable to get device " name );                            \
diff --git a/test_conformance/api/test_sub_group_dispatch.cpp b/test_conformance/api/test_sub_group_dispatch.cpp
index 01d0ffa3..61d9a524 100644
--- a/test_conformance/api/test_sub_group_dispatch.cpp
+++ b/test_conformance/api/test_sub_group_dispatch.cpp
@@ -56,11 +56,9 @@ cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper&
 
 int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-    static const size_t gsize0 = 80;
-    int i, error;
+    int error;
     size_t realSize;
     size_t kernel_max_subgroup_size, kernel_subgroup_count;
-    size_t global[] = {1,1,1};
     size_t max_local;
 
     cl_platform_id platform;
diff --git a/test_conformance/basic/test_fpmath_float.cpp b/test_conformance/basic/test_fpmath_float.cpp
index 6e5deb4b..60d509b0 100644
--- a/test_conformance/basic/test_fpmath_float.cpp
+++ b/test_conformance/basic/test_fpmath_float.cpp
@@ -49,8 +49,6 @@ static const char *fpmul_kernel_code =
 "}\n";
 
 
-static const float    MAX_ERR = 1e-5f;
-
 static int
 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n)
 {
diff --git a/test_conformance/basic/test_hiloeo.cpp b/test_conformance/basic/test_hiloeo.cpp
index 4cdf2ac7..3470ad00 100644
--- a/test_conformance/basic/test_hiloeo.cpp
+++ b/test_conformance/basic/test_hiloeo.cpp
@@ -43,8 +43,6 @@ static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
 // input type name is strcat(gentype, vector_size_names[i]);
 // and output type name is
 // strcat(gentype, vector_size_names[out_vector_idx[i]]);
-static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
-    -1,-1,-1,-1,-1,-1,-1,5};
 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
 
 static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
diff --git a/test_conformance/basic/test_hostptr.cpp b/test_conformance/basic/test_hostptr.cpp
index 65af5c3c..dee78675 100644
--- a/test_conformance/basic/test_hostptr.cpp
+++ b/test_conformance/basic/test_hostptr.cpp
@@ -32,8 +32,6 @@ const char *hostptr_kernel_code =
 "    dst[tid] = srcA[tid] + srcB[tid];\n"
 "}\n";
 
-static const float    MAX_ERR = 1e-5f;
-
 static int verify_hostptr(cl_float *inptrA, cl_float *inptrB, cl_float *outptr, int n)
 {
     cl_float       r;
diff --git a/test_conformance/basic/test_preprocessors.cpp b/test_conformance/basic/test_preprocessors.cpp
index 2038d150..e67487eb 100644
--- a/test_conformance/basic/test_preprocessors.cpp
+++ b/test_conformance/basic/test_preprocessors.cpp
@@ -97,10 +97,10 @@ int test_kernel_preprocessor_macros(cl_device_id deviceID, cl_context context, c
     char programSource[4096];
     char curFileName[512];
     char *programPtr = programSource;
-    int i = 0;
     snprintf(curFileName, 512, "%s", __FILE__);
 #ifdef _WIN32
     // Replace "\" with "\\"
+    int i = 0;
     while(curFileName[i] != '\0') {
         if (curFileName[i] == '\\') {
             int j = i + 1;
diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 62c0a6be..c0ad870a 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -1642,8 +1642,6 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_
                  "supported on this device\n");
         return TEST_SKIPPED_ITSELF;
     }
-    size_t max_size = 0;
-    size_t pref_size = 0;
 
     cl_int err = CL_SUCCESS;
 
diff --git a/test_conformance/basic/test_queue_priority.cpp b/test_conformance/basic/test_queue_priority.cpp
index 57ce5041..ff6283cd 100644
--- a/test_conformance/basic/test_queue_priority.cpp
+++ b/test_conformance/basic/test_queue_priority.cpp
@@ -48,13 +48,9 @@ static const char *fpmul_kernel_code =
 "    dst[tid] = srcA[tid] * srcB[tid];\n"
 "}\n";
 
-
-static const float    MAX_ERR = 1e-5f;
-
 static int
 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
 {
-    float       r;
     int         i;
 
     float * reference_ptr = (float *)malloc(n * sizeof(float));
@@ -82,7 +78,6 @@ verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
 static int
 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
 {
-    float       r;
     int         i;
 
     float * reference_ptr = (float *)malloc(n * sizeof(float));
@@ -110,7 +105,6 @@ verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
 static int
 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
 {
-    float       r;
     int         i;
 
     float * reference_ptr = (float *)malloc(n * sizeof(float));
diff --git a/test_conformance/basic/test_readimage3d.cpp b/test_conformance/basic/test_readimage3d.cpp
index 1337c9fb..5fd7d109 100644
--- a/test_conformance/basic/test_readimage3d.cpp
+++ b/test_conformance/basic/test_readimage3d.cpp
@@ -142,7 +142,7 @@ int test_readimage3d(cl_device_id device, cl_context context, cl_command_queue q
 	int img_width = 64;
 	int img_height = 64;
 	int img_depth = 64;
-	int i, err;
+	int err;
 	size_t origin[3] = {0, 0, 0};
 	size_t region[3] = {img_width, img_height, img_depth};
 	size_t length = img_width * img_height * img_depth * 4 * sizeof(float);
diff --git a/test_conformance/buffers/test_buffer_migrate.cpp b/test_conformance/buffers/test_buffer_migrate.cpp
index f3098366..6cdc271b 100644
--- a/test_conformance/buffers/test_buffer_migrate.cpp
+++ b/test_conformance/buffers/test_buffer_migrate.cpp
@@ -80,7 +80,7 @@ static cl_int migrateMemObject(enum migrations migrate, cl_command_queue *queues
 
 static cl_int restoreBuffer(cl_command_queue *queues, cl_mem *buffers, cl_uint num_devices, cl_mem_migration_flags *flags, cl_uint *buffer)
 {
-  cl_uint i, j;
+  cl_uint i;
   cl_int  err;
 
   // If the buffer was previously migrated with undefined content, reload the content.
diff --git a/test_conformance/buffers/test_image_migrate.cpp b/test_conformance/buffers/test_image_migrate.cpp
index dbdca9cc..6c8acdce 100644
--- a/test_conformance/buffers/test_image_migrate.cpp
+++ b/test_conformance/buffers/test_image_migrate.cpp
@@ -128,7 +128,6 @@ int test_image_migrate(cl_device_id deviceID, cl_context context, cl_command_que
   cl_mem_migration_flags *flagsA, *flagsB, *flagsC;
   cl_device_partition_property property[] = {CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, 0, 0};
   cl_mem *imageA, *imageB, *imageC;
-  cl_mem_flags flags;
   cl_image_format format;
   cl_sampler sampler = NULL;
   cl_program program = NULL;
diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 91441416..94657d61 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -361,8 +361,6 @@ int test_compiler_defines_for_extensions(cl_device_id device, cl_context context
     clProgramWrapper program;
     clKernelWrapper kernel;
 
-    Version version = get_device_cl_version(device);
-
     error = create_single_kernel_helper(context, &program, &kernel, 1,
                                         (const char **)&kernel_code, "test");
     test_error(error, "create_single_kernel_helper failed");
diff --git a/test_conformance/computeinfo/main.cpp b/test_conformance/computeinfo/main.cpp
index 03bdb2c1..382cd6a3 100644
--- a/test_conformance/computeinfo/main.cpp
+++ b/test_conformance/computeinfo/main.cpp
@@ -908,12 +908,6 @@ void dumpConfigInfo(config_info* info)
                 {
                     cl_name_version new_version_item =
                         info->config.cl_name_version_array[f];
-                    cl_version new_version_major =
-                        CL_VERSION_MAJOR_KHR(new_version_item.version);
-                    cl_version new_version_minor =
-                        CL_VERSION_MINOR_KHR(new_version_item.version);
-                    cl_version new_version_patch =
-                        CL_VERSION_PATCH_KHR(new_version_item.version);
                     log_info("\t\t\"%s\" %d.%d.%d\n", new_version_item.name,
                              CL_VERSION_MAJOR_KHR(new_version_item.version),
                              CL_VERSION_MINOR_KHR(new_version_item.version),
diff --git a/test_conformance/conversions/fplib.cpp b/test_conformance/conversions/fplib.cpp
index e739b9ae..3b19b56d 100644
--- a/test_conformance/conversions/fplib.cpp
+++ b/test_conformance/conversions/fplib.cpp
@@ -79,7 +79,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
             uint32_t mantissa;
             if (mantShift >= 0){
                 uint64_t temp = (uint64_t)data >> mantShift;
-                uint64_t mask = (1 << mantShift) - 1;
                 if ((temp << mantShift) != data)
                     inExact = 1;
                 mantissa = (uint32_t)temp;
@@ -124,7 +123,6 @@ float qcom_s64_2_f32(int64_t data, bool sat, roundingMode rnd)
             uint32_t    mantissa;
             if (mantShift >= 0){
                 uint64_t temp = (uint64_t)data >> mantShift;
-                uint64_t mask = (1 << mantShift) - 1;
                 if (temp << mantShift != data)
                     inExact = 1;
                 mantissa = (uint32_t)temp;
@@ -183,7 +181,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
             uint32_t    mantissa;
             if (mantShift >= 0){
                 uint64_t temp = data >> mantShift;
-                uint64_t mask = (1 << mantShift) - 1;
                 if (temp << mantShift != data)
                     inExact = 1;
                 mantissa = (uint32_t)temp;
@@ -209,7 +206,6 @@ float qcom_u64_2_f32(uint64_t data, bool sat, roundingMode rnd)
             uint32_t  mantissa;
             if (mantShift >= 0){
                 uint64_t temp = (uint64_t)data >> mantShift;
-                uint64_t mask = (1 << mantShift) - 1;
                 if (temp << mantShift != data)
                     inExact = 1;
                 mantissa = (uint32_t)temp;
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index d489e28a..788af99b 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1666,8 +1666,6 @@ static cl_program   MakeProgram( Type outType, Type inType, SaturationMode sat,
                                         &programSource, testName, flags);
     if (error)
     {
-        char    buffer[2048] = "";
-
         vlog_error("Failed to build kernel/program.\n", error);
         clReleaseProgram(program);
         return NULL;
diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
index 6025afb7..47e898b9 100644
--- a/test_conformance/events/test_callbacks.cpp
+++ b/test_conformance/events/test_callbacks.cpp
@@ -79,7 +79,6 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm
 
   /*  use struct as call back para */
   CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
-  int index [EVENT_CALLBACK_TYPE_TOTAL]={ 0,1,2};
   for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++)
   {
        user_data[i].enevt_type=event_callback_types[i];
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index 26693f99..c0efe864 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp
@@ -604,8 +604,6 @@ int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_com
 #ifdef CL_VERSION_1_2
 int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-
-    cl_int status;
     SETUP_EVENT( context, queue );
     cl_event event_list[3]={ NULL, NULL, NULL};
 
@@ -649,8 +647,6 @@ int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context
 
 int test_event_enqueue_barrier_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
 {
-
-    cl_int status;
     SETUP_EVENT( context, queue );
     cl_event event_list[3]={ NULL, NULL, NULL};
 
diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h
index 36221da1..aaa6a5e7 100644
--- a/test_conformance/gl/common.h
+++ b/test_conformance/gl/common.h
@@ -32,7 +32,11 @@ struct format {
 };
 
 // These are the typically tested formats.
-
+// TODO: These variables should be made const; until then, suppress unused
+// variable warnings as not every translation unit including this header uses
+// all variables.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-variable"
 static struct format common_formats[] = {
 #ifdef __APPLE__
   { GL_RGBA8,        GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8,         kUChar },
@@ -60,6 +64,7 @@ static struct format depth_formats[] = {
   { GL_DEPTH32F_STENCIL8,  GL_DEPTH_STENCIL,   GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat },
 };
 #endif
+#pragma GCC diagnostic pop
 
 int test_images_write_common(cl_device_id device, cl_context context,
   cl_command_queue queue, struct format* formats, size_t nformats,
diff --git a/test_conformance/gl/test_image_methods.cpp b/test_conformance/gl/test_image_methods.cpp
index 07f5b65e..7d055fb2 100644
--- a/test_conformance/gl/test_image_methods.cpp
+++ b/test_conformance/gl/test_image_methods.cpp
@@ -337,7 +337,6 @@ int test_image_methods_depth( cl_device_id device, cl_context context, cl_comman
     return 0;
   }
 
-    size_t pixelSize;
     int result = 0;
   GLenum depth_targets[] = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
   size_t ntargets = sizeof(depth_targets) / sizeof(depth_targets[0]);
@@ -378,7 +377,6 @@ int test_image_methods_multisample( cl_device_id device, cl_context context, cl_
     return 0;
   }
 
-    size_t pixelSize;
   int result = 0;
   GLenum targets[] = {GL_TEXTURE_2D_MULTISAMPLE, GL_TEXTURE_2D_MULTISAMPLE_ARRAY};
   size_t ntargets = sizeof(targets) / sizeof(targets[0]);
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 9bbb257b..15bad520 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -427,7 +427,6 @@ static int test_image_write( cl_context context, cl_command_queue queue,
 int supportsHalf(cl_context context, bool* supports_half)
 {
   int error;
-  size_t  size;
   cl_uint numDev;
 
   error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -446,7 +445,6 @@ int supportsHalf(cl_context context, bool* supports_half)
 int supportsMsaa(cl_context context, bool* supports_msaa)
 {
   int error;
-  size_t  size;
   cl_uint numDev;
 
   error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -465,7 +463,6 @@ int supportsMsaa(cl_context context, bool* supports_msaa)
 int supportsDepth(cl_context context, bool* supports_depth)
 {
   int error;
-  size_t  size;
   cl_uint numDev;
 
   error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDev, NULL);
@@ -486,7 +483,6 @@ static int test_image_format_write( cl_context context, cl_command_queue queue,
   GLenum internalFormat,  GLenum glType, ExplicitType type, MTdata d )
 {
   int error;
-  int samples = 8;
   // If we're testing a half float format, then we need to determine the
   // rounding mode of this machine.  Punt if we fail to do so.
 
diff --git a/test_conformance/half/Test_vStoreHalf.cpp b/test_conformance/half/Test_vStoreHalf.cpp
index b1491025..591470f0 100644
--- a/test_conformance/half/Test_vStoreHalf.cpp
+++ b/test_conformance/half/Test_vStoreHalf.cpp
@@ -81,7 +81,7 @@ ReferenceF(cl_uint jid, cl_uint tid, void *userInfo)
     cl_ushort *r = cri->r + off;
     f2h f = cri->f;
     cl_ulong i = cri->i + off;
-    cl_uint j, rr;
+    cl_uint j;
 
     if (off + count > lim)
         count = lim - off;
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index 2a42a70e..42933c0f 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -81,7 +81,6 @@ int test_read_image_1D(cl_context context, cl_command_queue queue,
 
   for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
   {
-    float lod_float = (float) lod;
     origin[1] = lod;
     size_t width_lod, row_pitch_lod;
 
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index 5d5c2883..efd2a795 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -82,7 +82,6 @@ int test_read_image_1D_array(cl_context context, cl_command_queue queue,
 
     for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
     {
-        float lod_float = (float) lod;
         size_t width_lod, row_pitch_lod, slice_pitch_lod;
         if( gTestMipmaps )
             origin[2] = lod;
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index fb2e7948..b7f8553b 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -81,7 +81,6 @@ int test_read_image_2D(cl_context context, cl_command_queue queue,
 
     for( size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
     {
-        float lod_float = (float) lod;
         origin[2] = lod;
         size_t width_lod, height_lod, row_pitch_lod;
 
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index d0113bb7..5889ad6a 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -83,9 +83,8 @@ int test_read_image_2D_array(cl_context context, cl_command_queue queue,
 
     for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
     {
-        float lod_float = (float) lod;
         origin[3] = lod;
-        size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
+        size_t width_lod, height_lod, row_pitch_lod, slice_pitch_lod;
 
         width_lod = (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
         height_lod = (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 2dcd2433..6f73f423 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -83,7 +83,6 @@ int test_read_image_3D(cl_context context, cl_command_queue queue,
 
     for(size_t lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels) || (!gTestMipmaps && lod < 1); lod++)
     {
-        float lod_float = (float) lod;
         origin[3] = lod;
         size_t width_lod, height_lod, depth_lod, row_pitch_lod, slice_pitch_lod;
 
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 595f024a..54449875 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -17,5 +17,15 @@ set(${MODULE_NAME}_SOURCES
     ../common.cpp
 )
 
+# Make unused variables not fatal in this module; see
+# https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
+if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+  SET_SOURCE_FILES_PROPERTIES(
+    ${${MODULE_NAME}_SOURCES}
+    PROPERTIES
+    COMPILE_FLAGS "-Wno-error=unused-variable"
+  )
+endif()
+
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index a0a3d65d..f91ecb22 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -5357,10 +5357,10 @@ long double reference_acosl(long double x)
         0x3243F6A8885A308DULL, 0x313198A2E0370734ULL
     }; // first 126 bits of pi
        // http://www.super-computing.org/pi-hexa_current.html
-    long double head, tail, temp;
+    long double head, tail;
 #if __LDBL_MANT_DIG__ >= 64
     // long double has 64-bits of precision or greater
-    temp = (long double)pi_bits[0] * 0x1.0p64L;
+    long double temp = (long double)pi_bits[0] * 0x1.0p64L;
     head = temp + (long double)pi_bits[1];
     temp -= head; // rounding err rounding pi_bits[1] into head
     tail = (long double)pi_bits[1] + temp;
diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 169ab80c..7e979251 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -163,7 +163,7 @@ int test_pipe_max_args(cl_device_id deviceID, cl_context context, cl_command_que
     cl_int err;
     cl_int size;
     int num_pipe_elements = 1024;
-    int i, j;
+    int i;
     int max_pipe_args;
     std::stringstream source;
     clEventWrapper producer_sync_event = NULL;
@@ -648,4 +648,4 @@ int test_pipe_max_active_reservations(cl_device_id deviceID, cl_context context,
     }
 
     return 0;
-}
\ No newline at end of file
+}
diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index dd0d1216..a502e03e 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -626,7 +626,6 @@ int test_pipe_readwrite_struct_generic( cl_device_id deviceID, cl_context contex
     size_t size = sizeof(TestStruct);
     size_t global_work_size[3];
     cl_int err;
-    int total_errors = 0;
     int i;
     MTdataHolder d(gRandomSeed);
     clEventWrapper producer_sync_event = NULL;
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index 12ff6535..a32ee4ea 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -232,10 +232,8 @@ int waitForEvent(cl_event* event)
 //-----------------------------------------
 static cl_program makePrintfProgram(cl_kernel *kernel_ptr, const cl_context context,const unsigned int testId,const unsigned int testNum,bool isLongSupport,bool is64bAddrSpace)
 {
-    int err,i;
+    int err;
     cl_program program;
-    cl_device_id devID;
-    char buildLog[ 1024 * 128 ];
     char testname[256] = {0};
     char addrSpaceArgument[256] = {0};
     char addrSpacePAddArgument[256] = {0};
diff --git a/test_conformance/printf/util_printf.cpp b/test_conformance/printf/util_printf.cpp
index 3546c5f5..d45e1d43 100644
--- a/test_conformance/printf/util_printf.cpp
+++ b/test_conformance/printf/util_printf.cpp
@@ -842,8 +842,6 @@ static void hexRefBuilder(printDataGenParameters& params, char* refResult, const
 */
 void generateRef(const cl_device_id device)
 {
-    int fd = -1;
-    char _refBuffer[ANALYSIS_BUFFER_SIZE];
     const cl_device_fp_config fpConfig = get_default_rounding_mode(device);
     const RoundingMode hostRound = get_round();
     RoundingMode deviceRound;
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index e659206e..27ee5ffd 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -173,8 +173,6 @@ static cl_program makeSelectProgram(cl_kernel *kernel_ptr, const cl_context cont
     char extension[128] = "";
     int  err = 0;
 
-    int i; // generic, re-usable loop variable
-
     const char *source[] = {
         extension,
         "__kernel void ", testname,
diff --git a/test_conformance/spir/run_services.cpp b/test_conformance/spir/run_services.cpp
index 3162e16f..6e06d53c 100644
--- a/test_conformance/spir/run_services.cpp
+++ b/test_conformance/spir/run_services.cpp
@@ -213,7 +213,6 @@ cl_kernel create_kernel_helper( cl_program program, const std::string& kernel_na
 {
     int error = CL_SUCCESS;
     cl_kernel kernel = NULL;
-    cl_device_id device = get_program_device(program);
     /* And create a kernel from it */
     kernel = clCreateKernel( program, kernel_name.c_str(), &error );
     if( kernel == NULL || error != CL_SUCCESS)
diff --git a/test_conformance/spirv_new/main.cpp b/test_conformance/spirv_new/main.cpp
index 5a8664b6..41566837 100644
--- a/test_conformance/spirv_new/main.cpp
+++ b/test_conformance/spirv_new/main.cpp
@@ -203,7 +203,6 @@ int get_program_with_il(clProgramWrapper &prog, const cl_device_id deviceID,
 test_status InitCL(cl_device_id id)
 {
     test_status spirv_status;
-    bool force = true;
     spirv_status = check_spirv_compilation_readiness(id);
     if (spirv_status != TEST_PASS)
     {
diff --git a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
index 6a4982eb..0728ea03 100644
--- a/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
+++ b/test_conformance/spirv_new/test_cl_khr_spirv_no_integer_wrap_decoration.cpp
@@ -109,7 +109,6 @@ int test_ext_cl_khr_spirv_no_integer_wrap_decoration(cl_device_id deviceID,
         kernelStr = kernelStream.str();
     }
 
-    size_t kernelLen = kernelStr.size();
     const char *kernelBuf = kernelStr.c_str();
 
     for (int i = 0; i < num; i++) {
diff --git a/test_conformance/spirv_new/test_op_fmath.cpp b/test_conformance/spirv_new/test_op_fmath.cpp
index bec0667c..61e2864d 100644
--- a/test_conformance/spirv_new/test_op_fmath.cpp
+++ b/test_conformance/spirv_new/test_op_fmath.cpp
@@ -79,11 +79,8 @@ int test_fmath(cl_device_id deviceID,
         kernelStr = kernelStream.str();
     }
 
-    size_t kernelLen = kernelStr.size();
     const char *kernelBuf = kernelStr.c_str();
 
-    const char *options = fast_math ? "-cl-fast-relaxed-math" : NULL;
-
     std::vector<T> h_ref(num);
 
     {
diff --git a/test_conformance/spirv_new/test_op_function.cpp b/test_conformance/spirv_new/test_op_function.cpp
index caa3e0d3..16183e80 100644
--- a/test_conformance/spirv_new/test_op_function.cpp
+++ b/test_conformance/spirv_new/test_op_function.cpp
@@ -33,7 +33,6 @@ int test_function(cl_device_id deviceID,
     err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer");
 
-    cl_uint bits = sizeof(void *) * 8;
     std::string spvStr = std::string("op_function") + "_" + std::string(funcType);
     const char *spvName = spvStr.c_str();
 
diff --git a/test_conformance/spirv_new/test_op_negate.cpp b/test_conformance/spirv_new/test_op_negate.cpp
index 1891c9bb..e3dc1f34 100644
--- a/test_conformance/spirv_new/test_op_negate.cpp
+++ b/test_conformance/spirv_new/test_op_negate.cpp
@@ -43,7 +43,6 @@ int test_negation(cl_device_id deviceID,
     err = clEnqueueWriteBuffer(queue, in, CL_TRUE, 0, bytes, &h_in[0], 0, NULL, NULL);
     SPIRV_CHECK_ERROR(err, "Failed to copy to in buffer");
 
-    cl_uint bits = sizeof(void *) * 8;
     std::string spvStr = std::string(funcName) + "_" + std::string(Tname);
     const char *spvName = spvStr.c_str();
 
diff --git a/test_conformance/spirv_new/test_op_opaque.cpp b/test_conformance/spirv_new/test_op_opaque.cpp
index 067d9e4e..e6216061 100644
--- a/test_conformance/spirv_new/test_op_opaque.cpp
+++ b/test_conformance/spirv_new/test_op_opaque.cpp
@@ -17,7 +17,6 @@ or Khronos Conformance Test Source License Agreement as executed between Khronos
 TEST_SPIRV_FUNC(op_type_opaque_simple)
 {
     const char *name = "opaque";
-    int num = (int)(1 << 10);
     cl_int err = CL_SUCCESS;
     std::vector<unsigned char> buffer_vec = readSPIRV(name);
 
diff --git a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
index 0a604bcf..0859668c 100644
--- a/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
+++ b/test_conformance/spirv_new/test_op_vector_times_scalar.cpp
@@ -75,7 +75,6 @@ int test_vector_times_scalar(cl_device_id deviceID,
         kernelStr = kernelStream.str();
     }
 
-    size_t kernelLen = kernelStr.size();
     const char *kernelBuf = kernelStr.c_str();
 
     std::vector<Tv> h_ref(num);
@@ -107,7 +106,6 @@ int test_vector_times_scalar(cl_device_id deviceID,
         SPIRV_CHECK_ERROR(err, "Failed to read from ref");
     }
 
-    cl_uint bits = sizeof(void *) * 8;
     std::string ref = "vector_times_scalar_";
     ref += Tname;
     const char *spvName = ref.c_str();
-- 
cgit v1.2.3


From 89c8d87963fb5cdafd2632d3892b10626a73ad2d Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 9 Sep 2022 17:58:31 +0100
Subject: [NFC] Fix unused variable warning in Release builds (#1494)

The condition inside the assert is dropped in Release builds, so
`num_printed` becomes unused.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/basic/test_progvar.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index c0ad870a..9c872be5 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -1256,6 +1256,7 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue
     char prog_src[MAX_STR];
     int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
     assert( num_printed < MAX_STR ); // or increase MAX_STR
+    (void)num_printed;
 
     StringTable ksrc;
     ksrc.add( prog_src );
-- 
cgit v1.2.3


From 00f21739e5f474bf55d8912756121aabb1d3045e Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Mon, 12 Sep 2022 11:49:13 +0100
Subject: Minor cleanups for run_conformance.py (#1492)

Use the print function from futures for Python 3 compatibility,
remove an unreachable statement, remove unused imports, and add
a missing sys.exit call when opening the log file fails.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/run_conformance.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index 52c91697..bb8f86ff 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -8,14 +8,14 @@
 #//
 #******************************************************************/
 
+from __future__ import print_function
+
 import os
 import re
 import sys
 import subprocess
 import time
-import commands
 import tempfile
-import math
 import string
 
 DEBUG = 0
@@ -144,7 +144,7 @@ def run_test_checking_output(current_directory, test_dir, log_file):
         if not done and p.returncode != None:
             if p.returncode < 0:
                 if not output_this_run:
-                    print ""
+                    print("")
                     output_this_run = True
                 write_screen_log("           ==> ERROR: test killed/crashed: " + str(p.returncode) + ".")
             done = True
@@ -161,7 +161,7 @@ def run_test_checking_output(current_directory, test_dir, log_file):
             match = re.search(".*(FAILED|ERROR).*", line)
             if match:
                 if not output_this_run:
-                    print ""
+                    print("")
                     output_this_run = True
                 print("           ==> " + line.replace('\n', ''))
             match = re.search(".*FAILED.*", line)
@@ -170,7 +170,7 @@ def run_test_checking_output(current_directory, test_dir, log_file):
             match = re.search(".*(PASSED).*", line)
             if match:
                 if not output_this_run:
-                    print ""
+                    print("")
                     output_this_run = True
                 print("               " + line.replace('\n', ''))
             # Write it to the log
@@ -193,7 +193,6 @@ def run_test_checking_output(current_directory, test_dir, log_file):
             except IOError:
                 write_screen_log("\n           ==> ERROR: could not reopen output file from test.")
                 return -1
-                done = True
         else:
             line = line + char_read
             pointer = pointer + 1
@@ -227,7 +226,7 @@ def run_tests(tests):
             log_file.write("========================================================================================\n")
             log_file.write("========================================================================================\n")
             previous_test = test_dir
-        print("(" + get_time() + ")     BEGIN  " + test_name.ljust(40) + ": "),
+        print("(" + get_time() + ")     BEGIN  " + test_name.ljust(40) + ": ", end='')
         log_file.write("     ----------------------------------------------------------------------------------------\n")
         log_file.write("     (" + get_time() + ")     Running Sub Test: " + test_name + "\n")
         log_file.write("     ----------------------------------------------------------------------------------------\n")
@@ -256,9 +255,9 @@ def run_tests(tests):
 
         # Move print the finish status
         if result == 0:
-            print("(" + get_time() + ")     PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"),
+            print("(" + get_time() + ")     PASSED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='')
         else:
-            print("(" + get_time() + ")     FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")"),
+            print("(" + get_time() + ")     FAILED " + test_name.ljust(40) + ": (" + str(int(run_time)).rjust(3) + "s, test " + str(test_number).rjust(3) + os.sep + str(len(tests)) + ")", end='')
 
         test_number = test_number + 1
         log_file.write("     ----------------------------------------------------------------------------------------\n")
@@ -295,7 +294,8 @@ for arg in sys.argv:
 try:
     log_file = open(log_file_name, "w")
 except IOError:
-    print "Could not open log file " + log_file_name
+    print("Could not open log file " + log_file_name)
+    sys.exit(-1)
 
 # Determine which devices to test
 device_types = ["CL_DEVICE_TYPE_DEFAULT", "CL_DEVICE_TYPE_CPU", "CL_DEVICE_TYPE_GPU", "CL_DEVICE_TYPE_ACCELERATOR", "CL_DEVICE_TYPE_ALL"]
-- 
cgit v1.2.3


From d928ac059c2fb175974af0b1abdf888f5f7db2cb Mon Sep 17 00:00:00 2001
From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com>
Date: Mon, 12 Sep 2022 17:12:06 +0530
Subject: Use correct size for memory allocation in SVM test (#1496)

Memory is allocated for cl_int, but mapped as size_t.

Use size_t instead of cl_int during allocation and mapping for consistency.
---
 test_conformance/SVM/test_shared_address_space_fine_grain.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
index a98a880c..3350972e 100644
--- a/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
+++ b/test_conformance/SVM/test_shared_address_space_fine_grain.cpp
@@ -47,7 +47,7 @@ int test_svm_shared_address_space_fine_grain(cl_device_id deviceID, cl_context c
   test_error2(error, pNodes, "malloc failed");
 
   // this allocation holds an index into the nodes buffer, it is used for node allocation
-  size_t* pAllocator = (size_t*) align_malloc(sizeof(cl_int), 128);
+  size_t *pAllocator = (size_t *)align_malloc(sizeof(size_t), 128);
   test_error2(error, pAllocator, "malloc failed");
 
   // this allocation holds the count of correct nodes, which is computed by the verify kernel.
-- 
cgit v1.2.3


From 1d74c85ff3ba210e8d14fa81feff237dcb52529a Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 13 Sep 2022 13:42:32 +0100
Subject: [NFC] Reformat code in events test (#1497)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/events/action_classes.cpp         | 529 +++++++++------
 test_conformance/events/action_classes.h           | 430 ++++++------
 test_conformance/events/main.cpp                   |  62 +-
 test_conformance/events/procs.h                    | 127 +++-
 test_conformance/events/testBase.h                 |   5 +-
 test_conformance/events/test_callbacks.cpp         | 371 ++++++-----
 .../events/test_event_dependencies.cpp             | 542 ++++++++++-----
 test_conformance/events/test_events.cpp            | 730 ++++++++++++---------
 test_conformance/events/test_userevents.cpp        | 426 +++++++-----
 .../events/test_userevents_multithreaded.cpp       |  38 +-
 test_conformance/events/test_waitlists.cpp         | 267 ++++----
 11 files changed, 2079 insertions(+), 1448 deletions(-)

diff --git a/test_conformance/events/action_classes.cpp b/test_conformance/events/action_classes.cpp
index d70d76bd..a84be6b6 100644
--- a/test_conformance/events/action_classes.cpp
+++ b/test_conformance/events/action_classes.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,7 +19,8 @@
 
 const cl_uint BufferSizeReductionFactor = 20;
 
-cl_int    Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight )
+cl_int Action::IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth,
+                                        size_t &outHeight)
 {
     cl_ulong maxAllocSize;
     size_t maxWidth, maxHeight;
@@ -27,23 +28,27 @@ cl_int    Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt
 
 
     // Get the largest possible buffer we could allocate
-    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
-    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
-    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
-    test_error( error, "Unable to get device config" );
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH,
+                             sizeof(maxWidth), &maxWidth, NULL);
+    error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                             sizeof(maxHeight), &maxHeight, NULL);
+    test_error(error, "Unable to get device config");
 
     // Create something of a decent size
-    if( maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor )
+    if (maxWidth * maxHeight * 4 > maxAllocSize / BufferSizeReductionFactor)
     {
-        float rootSize = sqrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+        float rootSize =
+            sqrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4)));
 
-        if( (size_t)rootSize > maxWidth )
+        if ((size_t)rootSize > maxWidth)
             outWidth = maxWidth;
         else
             outWidth = (size_t)rootSize;
-        outHeight = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / outWidth );
-        if( outHeight > maxHeight )
-            outHeight = maxHeight;
+        outHeight = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4))
+                             / outWidth);
+        if (outHeight > maxHeight) outHeight = maxHeight;
     }
     else
     {
@@ -51,19 +56,18 @@ cl_int    Action::IGetPreferredImageSize2D( cl_device_id device, size_t &outWidt
         outHeight = maxHeight;
     }
 
-    outWidth /=2;
-    outHeight /=2;
+    outWidth /= 2;
+    outHeight /= 2;
 
-    if (outWidth > 2048)
-        outWidth = 2048;
-    if (outHeight > 2048)
-        outHeight = 2048;
+    if (outWidth > 2048) outWidth = 2048;
+    if (outHeight > 2048) outHeight = 2048;
     log_info("\tImage size: %d x %d (%gMB)\n", (int)outWidth, (int)outHeight,
-             (double)((int)outWidth*(int)outHeight*4)/(1024.0*1024.0));
+             (double)((int)outWidth * (int)outHeight * 4) / (1024.0 * 1024.0));
     return CL_SUCCESS;
 }
 
-cl_int    Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth )
+cl_int Action::IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth,
+                                        size_t &outHeight, size_t &outDepth)
 {
     cl_ulong maxAllocSize;
     size_t maxWidth, maxHeight, maxDepth;
@@ -71,28 +75,34 @@ cl_int    Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt
 
 
     // Get the largest possible buffer we could allocate
-    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
-    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxWidth ), &maxWidth, NULL );
-    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxHeight ), &maxHeight, NULL );
-    error |= clGetDeviceInfo( device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDepth ), &maxDepth, NULL );
-    test_error( error, "Unable to get device config" );
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
+    error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
+                             sizeof(maxWidth), &maxWidth, NULL);
+    error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
+                             sizeof(maxHeight), &maxHeight, NULL);
+    error |= clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
+                             sizeof(maxDepth), &maxDepth, NULL);
+    test_error(error, "Unable to get device config");
 
     // Create something of a decent size
-    if( (cl_ulong)maxWidth * maxHeight * maxDepth > maxAllocSize / ( BufferSizeReductionFactor * 4 ) )
+    if ((cl_ulong)maxWidth * maxHeight * maxDepth
+        > maxAllocSize / (BufferSizeReductionFactor * 4))
     {
-        float rootSize = cbrtf( (float)( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) );
+        float rootSize =
+            cbrtf((float)(maxAllocSize / (BufferSizeReductionFactor * 4)));
 
-        if( (size_t)rootSize > maxWidth )
+        if ((size_t)rootSize > maxWidth)
             outWidth = maxWidth;
         else
             outWidth = (size_t)rootSize;
-        if( (size_t)rootSize > maxHeight )
+        if ((size_t)rootSize > maxHeight)
             outHeight = maxHeight;
         else
             outHeight = (size_t)rootSize;
-        outDepth = (size_t)( ( maxAllocSize / ( BufferSizeReductionFactor * 4 ) ) / ( outWidth * outHeight ) );
-        if( outDepth > maxDepth )
-            outDepth = maxDepth;
+        outDepth = (size_t)((maxAllocSize / (BufferSizeReductionFactor * 4))
+                            / (outWidth * outHeight));
+        if (outDepth > maxDepth) outDepth = maxDepth;
     }
     else
     {
@@ -101,25 +111,25 @@ cl_int    Action::IGetPreferredImageSize3D( cl_device_id device, size_t &outWidt
         outDepth = maxDepth;
     }
 
-    outWidth /=2;
-    outHeight /=2;
-    outDepth /=2;
+    outWidth /= 2;
+    outHeight /= 2;
+    outDepth /= 2;
 
-    if (outWidth > 512)
-        outWidth = 512;
-    if (outHeight > 512)
-        outHeight = 512;
-    if (outDepth > 512)
-        outDepth = 512;
-    log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth, (int)outHeight, (int)outDepth,
-             (double)((int)outWidth*(int)outHeight*(int)outDepth*4)/(1024.0*1024.0));
+    if (outWidth > 512) outWidth = 512;
+    if (outHeight > 512) outHeight = 512;
+    if (outDepth > 512) outDepth = 512;
+    log_info("\tImage size: %d x %d x %d (%gMB)\n", (int)outWidth,
+             (int)outHeight, (int)outDepth,
+             (double)((int)outWidth * (int)outHeight * (int)outDepth * 4)
+                 / (1024.0 * 1024.0));
 
     return CL_SUCCESS;
 }
 
 #pragma mark -------------------- Execution Sub-Classes -------------------------
 
-cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int NDRangeKernelAction::Setup(cl_device_id device, cl_context context,
+                                  cl_command_queue queue)
 {
     const char *long_kernel[] = {
         "__kernel void sample_test(__global float *src, __global int *dst)\n"
@@ -132,101 +142,116 @@ cl_int NDRangeKernelAction::Setup( cl_device_id device, cl_context context, cl_c
         "        dst[tid] = (int)src[tid] * 3;\n"
         "    }\n"
         "\n"
-        "}\n" };
+        "}\n"
+    };
 
     size_t threads[1] = { 1000 };
     int error;
 
-    if( create_single_kernel_helper( context, &mProgram, &mKernel, 1, long_kernel, "sample_test" ) )
+    if (create_single_kernel_helper(context, &mProgram, &mKernel, 1,
+                                    long_kernel, "sample_test"))
     {
         return -1;
     }
 
-    error = get_max_common_work_group_size( context, mKernel, threads[0], &mLocalThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error = get_max_common_work_group_size(context, mKernel, threads[0],
+                                           &mLocalThreads[0]);
+    test_error(error, "Unable to get work group size to use");
 
     mStreams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                  sizeof(cl_float) * 1000, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
     mStreams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                  sizeof(cl_int) * 1000, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
     /* Set the arguments */
-    error = clSetKernelArg( mKernel, 0, sizeof( mStreams[0] ), &mStreams[0] );
-    test_error( error, "Unable to set kernel arguments" );
-    error = clSetKernelArg( mKernel, 1, sizeof( mStreams[1] ), &mStreams[1] );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(mKernel, 0, sizeof(mStreams[0]), &mStreams[0]);
+    test_error(error, "Unable to set kernel arguments");
+    error = clSetKernelArg(mKernel, 1, sizeof(mStreams[1]), &mStreams[1]);
+    test_error(error, "Unable to set kernel arguments");
 
     return CL_SUCCESS;
 }
 
-cl_int    NDRangeKernelAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int NDRangeKernelAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                    cl_event *waits, cl_event *outEvent)
 {
     size_t threads[1] = { 1000 };
-    cl_int error = clEnqueueNDRangeKernel( queue, mKernel, 1, NULL, threads, mLocalThreads, numWaits, waits, outEvent );
-    test_error( error, "Unable to execute kernel" );
+    cl_int error =
+        clEnqueueNDRangeKernel(queue, mKernel, 1, NULL, threads, mLocalThreads,
+                               numWaits, waits, outEvent);
+    test_error(error, "Unable to execute kernel");
 
     return CL_SUCCESS;
 }
 
 #pragma mark -------------------- Buffer Sub-Classes -------------------------
 
-cl_int BufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate )
+cl_int BufferAction::Setup(cl_device_id device, cl_context context,
+                           cl_command_queue queue, bool allocate)
 {
     cl_int error;
     cl_ulong maxAllocSize;
 
 
     // Get the largest possible buffer we could allocate
-    error = clGetDeviceInfo( device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
+    error = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
+                            sizeof(maxAllocSize), &maxAllocSize, NULL);
 
-    // Don't create a buffer quite that big, just so we have some space left over for other work
-    mSize = (size_t)( maxAllocSize / BufferSizeReductionFactor );
+    // Don't create a buffer quite that big, just so we have some space left
+    // over for other work
+    mSize = (size_t)(maxAllocSize / BufferSizeReductionFactor);
 
     // Cap at 128M so tests complete in a reasonable amount of time.
-    if (mSize > 128 << 20)
-        mSize = 128 << 20;
+    if (mSize > 128 << 20) mSize = 128 << 20;
 
-    mSize /=2;
+    mSize /= 2;
 
-    log_info("\tBuffer size: %gMB\n", (double)mSize/(1024.0*1024.0));
+    log_info("\tBuffer size: %gMB\n", (double)mSize / (1024.0 * 1024.0));
 
-    mBuffer = clCreateBuffer( context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, mSize, NULL, &error );
-    test_error( error, "Unable to create buffer to test against" );
+    mBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
+                             mSize, NULL, &error);
+    test_error(error, "Unable to create buffer to test against");
 
-    mOutBuffer = malloc( mSize );
-    if( mOutBuffer == NULL )
+    mOutBuffer = malloc(mSize);
+    if (mOutBuffer == NULL)
     {
-        log_error( "ERROR: Unable to allocate temp buffer (out of memory)\n" );
+        log_error("ERROR: Unable to allocate temp buffer (out of memory)\n");
         return CL_OUT_OF_RESOURCES;
     }
 
     return CL_SUCCESS;
 }
 
-cl_int ReadBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadBufferAction::Setup(cl_device_id device, cl_context context,
+                               cl_command_queue queue)
 {
-    return BufferAction::Setup( device, context, queue, true );
+    return BufferAction::Setup(device, context, queue, true);
 }
 
-cl_int    ReadBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                 cl_event *waits, cl_event *outEvent)
 {
-    cl_int error = clEnqueueReadBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue buffer read" );
+    cl_int error = clEnqueueReadBuffer(queue, mBuffer, CL_FALSE, 0, mSize,
+                                       mOutBuffer, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue buffer read");
 
     return CL_SUCCESS;
 }
 
-cl_int WriteBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteBufferAction::Setup(cl_device_id device, cl_context context,
+                                cl_command_queue queue)
 {
-    return BufferAction::Setup( device, context, queue, true );
+    return BufferAction::Setup(device, context, queue, true);
 }
 
-cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                  cl_event *waits, cl_event *outEvent)
 {
-    cl_int error = clEnqueueWriteBuffer( queue, mBuffer, CL_FALSE, 0, mSize, mOutBuffer, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue buffer write" );
+    cl_int error = clEnqueueWriteBuffer(queue, mBuffer, CL_FALSE, 0, mSize,
+                                        mOutBuffer, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue buffer write");
 
     return CL_SUCCESS;
 }
@@ -234,40 +259,46 @@ cl_int WriteBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_
 MapBufferAction::~MapBufferAction()
 {
     if (mQueue)
-        clEnqueueUnmapMemObject( mQueue, mBuffer, mMappedPtr, 0, NULL, NULL );
+        clEnqueueUnmapMemObject(mQueue, mBuffer, mMappedPtr, 0, NULL, NULL);
 }
 
-cl_int MapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int MapBufferAction::Setup(cl_device_id device, cl_context context,
+                              cl_command_queue queue)
 {
-    return BufferAction::Setup( device, context, queue, false );
+    return BufferAction::Setup(device, context, queue, false);
 }
 
-cl_int MapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int MapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                cl_event *waits, cl_event *outEvent)
 {
     cl_int error;
     mQueue = queue;
-    mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_FALSE, CL_MAP_READ, 0, mSize, numWaits, waits, outEvent, &error );
-    test_error( error, "Unable to enqueue buffer map" );
+    mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_FALSE, CL_MAP_READ, 0,
+                                    mSize, numWaits, waits, outEvent, &error);
+    test_error(error, "Unable to enqueue buffer map");
 
     return CL_SUCCESS;
 }
 
-cl_int UnmapBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int UnmapBufferAction::Setup(cl_device_id device, cl_context context,
+                                cl_command_queue queue)
 {
-    cl_int error = BufferAction::Setup( device, context, queue, false );
-    if( error != CL_SUCCESS )
-        return error;
+    cl_int error = BufferAction::Setup(device, context, queue, false);
+    if (error != CL_SUCCESS) return error;
 
-    mMappedPtr = clEnqueueMapBuffer( queue, mBuffer, CL_TRUE, CL_MAP_READ, 0, mSize, 0, NULL, NULL, &error );
-    test_error( error, "Unable to enqueue buffer map" );
+    mMappedPtr = clEnqueueMapBuffer(queue, mBuffer, CL_TRUE, CL_MAP_READ, 0,
+                                    mSize, 0, NULL, NULL, &error);
+    test_error(error, "Unable to enqueue buffer map");
 
     return CL_SUCCESS;
 }
 
-cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int UnmapBufferAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                  cl_event *waits, cl_event *outEvent)
 {
-    cl_int error = clEnqueueUnmapMemObject( queue, mBuffer, mMappedPtr, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue buffer unmap" );
+    cl_int error = clEnqueueUnmapMemObject(queue, mBuffer, mMappedPtr, numWaits,
+                                           waits, outEvent);
+    test_error(error, "Unable to enqueue buffer unmap");
 
     return CL_SUCCESS;
 }
@@ -275,349 +306,410 @@ cl_int UnmapBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_
 
 #pragma mark -------------------- Read/Write Image Classes -------------------------
 
-cl_int ReadImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadImage2DAction::Setup(cl_device_id device, cl_context context,
+                                cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
+    mImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                             mHeight, 0, NULL, &error);
 
-    test_error( error, "Unable to create image to test against" );
+    test_error(error, "Unable to create image to test against");
 
-    mOutput = malloc( mWidth * mHeight * 4 );
-    if( mOutput == NULL )
+    mOutput = malloc(mWidth * mHeight * 4);
+    if (mOutput == NULL)
     {
-        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        log_error("ERROR: Unable to allocate buffer: out of memory\n");
         return CL_OUT_OF_RESOURCES;
     }
 
     return CL_SUCCESS;
 }
 
-cl_int ReadImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                  cl_event *waits, cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
 
-    cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image read" );
+    cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region,
+                                      0, 0, mOutput, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image read");
 
     return CL_SUCCESS;
 }
 
-cl_int ReadImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int ReadImage3DAction::Setup(cl_device_id device, cl_context context,
+                                cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                             mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mOutput = malloc( mWidth * mHeight * mDepth * 4 );
-    if( mOutput == NULL )
+    mOutput = malloc(mWidth * mHeight * mDepth * 4);
+    if (mOutput == NULL)
     {
-        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        log_error("ERROR: Unable to allocate buffer: out of memory\n");
         return CL_OUT_OF_RESOURCES;
     }
 
     return CL_SUCCESS;
 }
 
-cl_int ReadImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int ReadImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                  cl_event *waits, cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
 
-    cl_int error = clEnqueueReadImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image read" );
+    cl_int error = clEnqueueReadImage(queue, mImage, CL_FALSE, origin, region,
+                                      0, 0, mOutput, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image read");
 
     return CL_SUCCESS;
 }
 
-cl_int WriteImage2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteImage2DAction::Setup(cl_device_id device, cl_context context,
+                                 cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+                             mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mOutput = malloc( mWidth * mHeight * 4 );
-    if( mOutput == NULL )
+    mOutput = malloc(mWidth * mHeight * 4);
+    if (mOutput == NULL)
     {
-        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        log_error("ERROR: Unable to allocate buffer: out of memory\n");
         return CL_OUT_OF_RESOURCES;
     }
 
     return CL_SUCCESS;
 }
 
-cl_int WriteImage2DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteImage2DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                   cl_event *waits, cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
 
-    cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image write" );
+    cl_int error =
+        clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0,
+                            mOutput, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image write");
 
     return CL_SUCCESS;
 }
 
-cl_int WriteImage3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int WriteImage3DAction::Setup(cl_device_id device, cl_context context,
+                                 cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                             mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mOutput = malloc( mWidth * mHeight * mDepth * 4 );
-    if( mOutput == NULL )
+    mOutput = malloc(mWidth * mHeight * mDepth * 4);
+    if (mOutput == NULL)
     {
-        log_error( "ERROR: Unable to allocate buffer: out of memory\n" );
+        log_error("ERROR: Unable to allocate buffer: out of memory\n");
         return CL_OUT_OF_RESOURCES;
     }
 
     return CL_SUCCESS;
 }
 
-cl_int WriteImage3DAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int WriteImage3DAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                   cl_event *waits, cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
 
-    cl_int error = clEnqueueWriteImage( queue, mImage, CL_FALSE, origin, region, 0, 0, mOutput, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image write" );
+    cl_int error =
+        clEnqueueWriteImage(queue, mImage, CL_FALSE, origin, region, 0, 0,
+                            mOutput, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image write");
 
     return CL_SUCCESS;
 }
 
 #pragma mark -------------------- Copy Image Classes -------------------------
 
-cl_int CopyImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyImageAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                                cl_event *waits, cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
 
-    cl_int error = clEnqueueCopyImage( queue, mSrcImage, mDstImage, origin, origin, region, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image copy" );
+    cl_int error =
+        clEnqueueCopyImage(queue, mSrcImage, mDstImage, origin, origin, region,
+                           numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image copy");
 
     return CL_SUCCESS;
 }
 
-cl_int CopyImage2Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage2Dto2DAction::Setup(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     mWidth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     mDepth = 1;
     return CL_SUCCESS;
 }
 
-cl_int CopyImage2Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage2Dto3DAction::Setup(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     mDepth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     mDepth = 1;
     return CL_SUCCESS;
 }
 
-cl_int CopyImage3Dto2DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage3Dto2DAction::Setup(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     mDepth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     mDepth = 1;
     return CL_SUCCESS;
 }
 
-cl_int CopyImage3Dto3DAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyImage3Dto3DAction::Setup(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     mDepth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     return CL_SUCCESS;
 }
 
 #pragma mark -------------------- Copy Image/Buffer Classes -------------------------
 
-cl_int Copy2DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int Copy2DImageToBufferAction::Setup(cl_device_id device, cl_context context,
+                                        cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     mWidth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_2d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_2d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * 4, NULL, &error );
-    test_error( error, "Unable to create buffer to test against" );
+    mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                mWidth * mHeight * 4, NULL, &error);
+    test_error(error, "Unable to create buffer to test against");
 
     return CL_SUCCESS;
 }
 
-cl_int Copy2DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int Copy2DImageToBufferAction::Execute(cl_command_queue queue,
+                                          cl_uint numWaits, cl_event *waits,
+                                          cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
 
-    cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image to buffer copy" );
+    cl_int error =
+        clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region,
+                                   0, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image to buffer copy");
 
     return CL_SUCCESS;
 }
 
-cl_int Copy3DImageToBufferAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int Copy3DImageToBufferAction::Setup(cl_device_id device, cl_context context,
+                                        cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     mDepth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mSrcImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mSrcImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
-    mDstBuffer = clCreateBuffer( context, CL_MEM_WRITE_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
-    test_error( error, "Unable to create buffer to test against" );
+    mDstBuffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY,
+                                mWidth * mHeight * mDepth * 4, NULL, &error);
+    test_error(error, "Unable to create buffer to test against");
 
     return CL_SUCCESS;
 }
 
-cl_int Copy3DImageToBufferAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int Copy3DImageToBufferAction::Execute(cl_command_queue queue,
+                                          cl_uint numWaits, cl_event *waits,
+                                          cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
 
-    cl_int error = clEnqueueCopyImageToBuffer( queue, mSrcImage, mDstBuffer, origin, region, 0, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue image to buffer copy" );
+    cl_int error =
+        clEnqueueCopyImageToBuffer(queue, mSrcImage, mDstBuffer, origin, region,
+                                   0, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue image to buffer copy");
 
     return CL_SUCCESS;
 }
 
-cl_int CopyBufferTo2DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyBufferTo2DImageAction::Setup(cl_device_id device, cl_context context,
+                                        cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     mWidth /= 2;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
 
-    mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * 4, NULL, &error );
-    test_error( error, "Unable to create buffer to test against" );
+    mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY, mWidth * mHeight * 4,
+                                NULL, &error);
+    test_error(error, "Unable to create buffer to test against");
 
-    mDstImage = create_image_2d( context, CL_MEM_WRITE_ONLY, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_2d(context, CL_MEM_WRITE_ONLY, &format, mWidth,
+                                mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     return CL_SUCCESS;
 }
 
-cl_int CopyBufferTo2DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyBufferTo2DImageAction::Execute(cl_command_queue queue,
+                                          cl_uint numWaits, cl_event *waits,
+                                          cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
 
-    cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue buffer to image copy" );
+    cl_int error =
+        clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin,
+                                   region, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue buffer to image copy");
 
     return CL_SUCCESS;
 }
 
-cl_int CopyBufferTo3DImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int CopyBufferTo3DImageAction::Setup(cl_device_id device, cl_context context,
+                                        cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize3D( device, mWidth, mHeight, mDepth ) ) )
+    if ((error = IGetPreferredImageSize3D(device, mWidth, mHeight, mDepth)))
         return error;
 
     mDepth /= 2;
 
-    mSrcBuffer = clCreateBuffer( context, CL_MEM_READ_ONLY, mWidth * mHeight * mDepth * 4, NULL, &error );
-    test_error( error, "Unable to create buffer to test against" );
+    mSrcBuffer = clCreateBuffer(context, CL_MEM_READ_ONLY,
+                                mWidth * mHeight * mDepth * 4, NULL, &error);
+    test_error(error, "Unable to create buffer to test against");
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mDstImage = create_image_3d( context, CL_MEM_READ_ONLY, &format, mWidth, mHeight, mDepth, 0, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mDstImage = create_image_3d(context, CL_MEM_READ_ONLY, &format, mWidth,
+                                mHeight, mDepth, 0, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     return CL_SUCCESS;
 }
 
-cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int CopyBufferTo3DImageAction::Execute(cl_command_queue queue,
+                                          cl_uint numWaits, cl_event *waits,
+                                          cl_event *outEvent)
 {
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, mDepth };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, mDepth };
 
-    cl_int error = clEnqueueCopyBufferToImage( queue, mSrcBuffer, mDstImage, 0, origin, region, numWaits, waits, outEvent );
-    test_error( error, "Unable to enqueue buffer to image copy" );
+    cl_int error =
+        clEnqueueCopyBufferToImage(queue, mSrcBuffer, mDstImage, 0, origin,
+                                   region, numWaits, waits, outEvent);
+    test_error(error, "Unable to enqueue buffer to image copy");
 
     return CL_SUCCESS;
 }
@@ -627,34 +719,39 @@ cl_int CopyBufferTo3DImageAction::Execute( cl_command_queue queue, cl_uint numWa
 MapImageAction::~MapImageAction()
 {
     if (mQueue)
-        clEnqueueUnmapMemObject( mQueue, mImage, mMappedPtr, 0, NULL, NULL );
+        clEnqueueUnmapMemObject(mQueue, mImage, mMappedPtr, 0, NULL, NULL);
 }
 
-cl_int MapImageAction::Setup( cl_device_id device, cl_context context, cl_command_queue queue )
+cl_int MapImageAction::Setup(cl_device_id device, cl_context context,
+                             cl_command_queue queue)
 {
     cl_int error;
 
 
-    if( ( error = IGetPreferredImageSize2D( device, mWidth, mHeight ) ) )
+    if ((error = IGetPreferredImageSize2D(device, mWidth, mHeight)))
         return error;
 
     cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
-    mImage = create_image_2d( context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, &format, mWidth, mHeight, 0, NULL, &error );
-    test_error( error, "Unable to create image to test against" );
+    mImage = create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
+                             &format, mWidth, mHeight, 0, NULL, &error);
+    test_error(error, "Unable to create image to test against");
 
     return CL_SUCCESS;
 }
 
-cl_int MapImageAction::Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent )
+cl_int MapImageAction::Execute(cl_command_queue queue, cl_uint numWaits,
+                               cl_event *waits, cl_event *outEvent)
 {
     cl_int error;
 
-    size_t origin[ 3 ] = { 0, 0, 0 }, region[ 3 ] = { mWidth, mHeight, 1 };
+    size_t origin[3] = { 0, 0, 0 }, region[3] = { mWidth, mHeight, 1 };
     size_t outPitch;
 
     mQueue = queue;
-    mMappedPtr = clEnqueueMapImage( queue, mImage, CL_FALSE, CL_MAP_READ, origin, region, &outPitch, NULL, numWaits, waits, outEvent, &error );
-    test_error( error, "Unable to enqueue image map" );
+    mMappedPtr =
+        clEnqueueMapImage(queue, mImage, CL_FALSE, CL_MAP_READ, origin, region,
+                          &outPitch, NULL, numWaits, waits, outEvent, &error);
+    test_error(error, "Unable to enqueue image map");
 
     return CL_SUCCESS;
 }
diff --git a/test_conformance/events/action_classes.h b/test_conformance/events/action_classes.h
index 069ed346..e528f11a 100644
--- a/test_conformance/events/action_classes.h
+++ b/test_conformance/events/action_classes.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -23,303 +23,319 @@
 // it would potentially be possible for an implementation to make actions
 // wait on one another based on their shared I/O, not because of their
 // wait lists!
-class Action
-{
-    public:
-        Action() {}
-        virtual ~Action() {}
-
-        virtual cl_int        Setup( cl_device_id device, cl_context context, cl_command_queue queue ) = 0;
-        virtual cl_int        Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent ) = 0;
-
-        virtual const char * GetName( void ) const = 0;
-
-    protected:
-
-        cl_int    IGetPreferredImageSize2D( cl_device_id device, size_t &outWidth, size_t &outHeight );
-        cl_int    IGetPreferredImageSize3D( cl_device_id device, size_t &outWidth, size_t &outHeight, size_t &outDepth );
+class Action {
+public:
+    Action() {}
+    virtual ~Action() {}
+
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue) = 0;
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent) = 0;
+
+    virtual const char *GetName(void) const = 0;
+
+protected:
+    cl_int IGetPreferredImageSize2D(cl_device_id device, size_t &outWidth,
+                                    size_t &outHeight);
+    cl_int IGetPreferredImageSize3D(cl_device_id device, size_t &outWidth,
+                                    size_t &outHeight, size_t &outDepth);
 };
 
 // Simple NDRangeKernel execution that takes a noticable amount of time
-class NDRangeKernelAction : public Action
-{
-    public:
-        NDRangeKernelAction() {}
-        virtual ~NDRangeKernelAction() {}
-
-        size_t                mLocalThreads[ 1 ];
-        clMemWrapper        mStreams[ 2 ];
-        clProgramWrapper    mProgram;
-        clKernelWrapper        mKernel;
-
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
-
-        virtual const char * GetName( void ) const { return "NDRangeKernel"; }
+class NDRangeKernelAction : public Action {
+public:
+    NDRangeKernelAction() {}
+    virtual ~NDRangeKernelAction() {}
+
+    size_t mLocalThreads[1];
+    clMemWrapper mStreams[2];
+    clProgramWrapper mProgram;
+    clKernelWrapper mKernel;
+
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
+
+    virtual const char *GetName(void) const { return "NDRangeKernel"; }
 };
 
 // Base action for buffer actions
-class BufferAction : public Action
-{
-    public:
-        clMemWrapper        mBuffer;
-        size_t                mSize;
-        void                *mOutBuffer;
+class BufferAction : public Action {
+public:
+    clMemWrapper mBuffer;
+    size_t mSize;
+    void *mOutBuffer;
 
-        BufferAction() { mOutBuffer = NULL; }
-        virtual ~BufferAction() { free( mOutBuffer ); }
+    BufferAction() { mOutBuffer = NULL; }
+    virtual ~BufferAction() { free(mOutBuffer); }
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue, bool allocate );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue, bool allocate);
 };
 
-class ReadBufferAction : public BufferAction
-{
-    public:
-        ReadBufferAction() {}
-        virtual ~ReadBufferAction() {}
+class ReadBufferAction : public BufferAction {
+public:
+    ReadBufferAction() {}
+    virtual ~ReadBufferAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "ReadBuffer"; }
+    virtual const char *GetName(void) const { return "ReadBuffer"; }
 };
 
-class WriteBufferAction : public BufferAction
-{
-    public:
-        WriteBufferAction() {}
-        virtual ~WriteBufferAction() {}
+class WriteBufferAction : public BufferAction {
+public:
+    WriteBufferAction() {}
+    virtual ~WriteBufferAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "WriteBuffer"; }
+    virtual const char *GetName(void) const { return "WriteBuffer"; }
 };
 
-class MapBufferAction : public BufferAction
-{
-    public:
-        MapBufferAction() : mQueue(0) {}
+class MapBufferAction : public BufferAction {
+public:
+    MapBufferAction(): mQueue(0) {}
 
-        cl_command_queue    mQueue;
-        void                *mMappedPtr;
+    cl_command_queue mQueue;
+    void *mMappedPtr;
 
-        virtual ~MapBufferAction();
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual ~MapBufferAction();
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "MapBuffer"; }
+    virtual const char *GetName(void) const { return "MapBuffer"; }
 };
 
-class UnmapBufferAction : public BufferAction
-{
-    public:
-        UnmapBufferAction() {}
-        virtual ~UnmapBufferAction() {}
+class UnmapBufferAction : public BufferAction {
+public:
+    UnmapBufferAction() {}
+    virtual ~UnmapBufferAction() {}
 
-        void                *mMappedPtr;
+    void *mMappedPtr;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "UnmapBuffer"; }
+    virtual const char *GetName(void) const { return "UnmapBuffer"; }
 };
 
-class ReadImage2DAction : public Action
-{
-    public:
-        ReadImage2DAction() { mOutput = NULL; }
-        virtual ~ReadImage2DAction() { free( mOutput ); }
+class ReadImage2DAction : public Action {
+public:
+    ReadImage2DAction() { mOutput = NULL; }
+    virtual ~ReadImage2DAction() { free(mOutput); }
 
-        clMemWrapper        mImage;
-        size_t                mWidth, mHeight;
-        void                *mOutput;
+    clMemWrapper mImage;
+    size_t mWidth, mHeight;
+    void *mOutput;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "ReadImage2D"; }
+    virtual const char *GetName(void) const { return "ReadImage2D"; }
 };
 
-class ReadImage3DAction : public Action
-{
-    public:
-        ReadImage3DAction() { mOutput = NULL; }
-        virtual ~ReadImage3DAction() { free( mOutput ); }
+class ReadImage3DAction : public Action {
+public:
+    ReadImage3DAction() { mOutput = NULL; }
+    virtual ~ReadImage3DAction() { free(mOutput); }
 
-        clMemWrapper        mImage;
-        size_t                mWidth, mHeight, mDepth;
-        void                *mOutput;
+    clMemWrapper mImage;
+    size_t mWidth, mHeight, mDepth;
+    void *mOutput;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "ReadImage3D"; }
+    virtual const char *GetName(void) const { return "ReadImage3D"; }
 };
 
-class WriteImage2DAction : public Action
-{
-    public:
-        clMemWrapper        mImage;
-        size_t                mWidth, mHeight;
-        void                *mOutput;
+class WriteImage2DAction : public Action {
+public:
+    clMemWrapper mImage;
+    size_t mWidth, mHeight;
+    void *mOutput;
 
-        WriteImage2DAction() { mOutput = NULL; }
-        virtual ~WriteImage2DAction() { free( mOutput ); }
+    WriteImage2DAction() { mOutput = NULL; }
+    virtual ~WriteImage2DAction() { free(mOutput); }
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "WriteImage2D"; }
+    virtual const char *GetName(void) const { return "WriteImage2D"; }
 };
 
-class WriteImage3DAction : public Action
-{
-    public:
-        clMemWrapper        mImage;
-        size_t                mWidth, mHeight, mDepth;
-        void                *mOutput;
+class WriteImage3DAction : public Action {
+public:
+    clMemWrapper mImage;
+    size_t mWidth, mHeight, mDepth;
+    void *mOutput;
 
-        WriteImage3DAction() { mOutput = NULL; }
-        virtual ~WriteImage3DAction() { free( mOutput ); }
+    WriteImage3DAction() { mOutput = NULL; }
+    virtual ~WriteImage3DAction() { free(mOutput); }
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "WriteImage3D"; }
+    virtual const char *GetName(void) const { return "WriteImage3D"; }
 };
 
-class CopyImageAction : public Action
-{
-    public:
-        CopyImageAction() {}
-        virtual ~CopyImageAction() {}
+class CopyImageAction : public Action {
+public:
+    CopyImageAction() {}
+    virtual ~CopyImageAction() {}
 
-        clMemWrapper        mSrcImage, mDstImage;
-        size_t                mWidth, mHeight, mDepth;
+    clMemWrapper mSrcImage, mDstImage;
+    size_t mWidth, mHeight, mDepth;
 
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 };
 
-class CopyImage2Dto2DAction : public CopyImageAction
-{
-    public:
-        CopyImage2Dto2DAction() {}
-        virtual ~CopyImage2Dto2DAction() {}
+class CopyImage2Dto2DAction : public CopyImageAction {
+public:
+    CopyImage2Dto2DAction() {}
+    virtual ~CopyImage2Dto2DAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
 
-        virtual const char * GetName( void ) const { return "CopyImage2Dto2D"; }
+    virtual const char *GetName(void) const { return "CopyImage2Dto2D"; }
 };
 
-class CopyImage2Dto3DAction : public CopyImageAction
-{
-    public:
-        CopyImage2Dto3DAction() {}
-        virtual ~CopyImage2Dto3DAction() {}
+class CopyImage2Dto3DAction : public CopyImageAction {
+public:
+    CopyImage2Dto3DAction() {}
+    virtual ~CopyImage2Dto3DAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
 
-        virtual const char * GetName( void ) const { return "CopyImage2Dto3D"; }
+    virtual const char *GetName(void) const { return "CopyImage2Dto3D"; }
 };
 
-class CopyImage3Dto2DAction : public CopyImageAction
-{
-    public:
-        CopyImage3Dto2DAction() {}
-        virtual ~CopyImage3Dto2DAction() {}
+class CopyImage3Dto2DAction : public CopyImageAction {
+public:
+    CopyImage3Dto2DAction() {}
+    virtual ~CopyImage3Dto2DAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
 
-        virtual const char * GetName( void ) const { return "CopyImage3Dto2D"; }
+    virtual const char *GetName(void) const { return "CopyImage3Dto2D"; }
 };
 
-class CopyImage3Dto3DAction : public CopyImageAction
-{
-    public:
-        CopyImage3Dto3DAction() {}
-        virtual ~CopyImage3Dto3DAction() {}
+class CopyImage3Dto3DAction : public CopyImageAction {
+public:
+    CopyImage3Dto3DAction() {}
+    virtual ~CopyImage3Dto3DAction() {}
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
 
-        virtual const char * GetName( void ) const { return "CopyImage3Dto3D"; }
+    virtual const char *GetName(void) const { return "CopyImage3Dto3D"; }
 };
 
-class Copy2DImageToBufferAction : public Action
-{
-    public:
-        Copy2DImageToBufferAction() {}
-        virtual ~Copy2DImageToBufferAction() {}
+class Copy2DImageToBufferAction : public Action {
+public:
+    Copy2DImageToBufferAction() {}
+    virtual ~Copy2DImageToBufferAction() {}
 
-        clMemWrapper        mSrcImage, mDstBuffer;
-        size_t                mWidth, mHeight;
+    clMemWrapper mSrcImage, mDstBuffer;
+    size_t mWidth, mHeight;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "Copy2DImageToBuffer"; }
+    virtual const char *GetName(void) const { return "Copy2DImageToBuffer"; }
 };
 
-class Copy3DImageToBufferAction : public Action
-{
-    public:
-        Copy3DImageToBufferAction() {}
-        virtual ~Copy3DImageToBufferAction() {}
+class Copy3DImageToBufferAction : public Action {
+public:
+    Copy3DImageToBufferAction() {}
+    virtual ~Copy3DImageToBufferAction() {}
 
-        clMemWrapper        mSrcImage, mDstBuffer;
-        size_t                mWidth, mHeight, mDepth;
+    clMemWrapper mSrcImage, mDstBuffer;
+    size_t mWidth, mHeight, mDepth;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "Copy3DImageToBuffer"; }
+    virtual const char *GetName(void) const { return "Copy3DImageToBuffer"; }
 };
 
-class CopyBufferTo2DImageAction : public Action
-{
-    public:
-        CopyBufferTo2DImageAction() {}
-        virtual ~CopyBufferTo2DImageAction() {}
+class CopyBufferTo2DImageAction : public Action {
+public:
+    CopyBufferTo2DImageAction() {}
+    virtual ~CopyBufferTo2DImageAction() {}
 
-        clMemWrapper        mSrcBuffer, mDstImage;
-        size_t                mWidth, mHeight;
+    clMemWrapper mSrcBuffer, mDstImage;
+    size_t mWidth, mHeight;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "CopyBufferTo2D"; }
+    virtual const char *GetName(void) const { return "CopyBufferTo2D"; }
 };
 
-class CopyBufferTo3DImageAction : public Action
-{
-    public:
-        CopyBufferTo3DImageAction() {}
-        virtual ~CopyBufferTo3DImageAction() {}
+class CopyBufferTo3DImageAction : public Action {
+public:
+    CopyBufferTo3DImageAction() {}
+    virtual ~CopyBufferTo3DImageAction() {}
 
-        clMemWrapper        mSrcBuffer, mDstImage;
-        size_t                mWidth, mHeight, mDepth;
+    clMemWrapper mSrcBuffer, mDstImage;
+    size_t mWidth, mHeight, mDepth;
 
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "CopyBufferTo3D"; }
+    virtual const char *GetName(void) const { return "CopyBufferTo3D"; }
 };
 
-class MapImageAction : public Action
-{
-    public:
-        MapImageAction() : mQueue(0) {}
+class MapImageAction : public Action {
+public:
+    MapImageAction(): mQueue(0) {}
 
-        clMemWrapper        mImage;
-        size_t                mWidth, mHeight;
-        void                *mMappedPtr;
-        cl_command_queue    mQueue;
+    clMemWrapper mImage;
+    size_t mWidth, mHeight;
+    void *mMappedPtr;
+    cl_command_queue mQueue;
 
-        virtual ~MapImageAction();
-        virtual cl_int Setup( cl_device_id device, cl_context context, cl_command_queue queue );
-        virtual cl_int    Execute( cl_command_queue queue, cl_uint numWaits, cl_event *waits, cl_event *outEvent );
+    virtual ~MapImageAction();
+    virtual cl_int Setup(cl_device_id device, cl_context context,
+                         cl_command_queue queue);
+    virtual cl_int Execute(cl_command_queue queue, cl_uint numWaits,
+                           cl_event *waits, cl_event *outEvent);
 
-        virtual const char * GetName( void ) const { return "MapImage"; }
+    virtual const char *GetName(void) const { return "MapImage"; }
 };
 
 
diff --git a/test_conformance/events/main.cpp b/test_conformance/events/main.cpp
index 777d2d36..74682f99 100644
--- a/test_conformance/events/main.cpp
+++ b/test_conformance/events/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,44 +24,44 @@
 #endif
 
 test_definition test_list[] = {
-    ADD_TEST( event_get_execute_status ),
-    ADD_TEST( event_get_write_array_status ),
-    ADD_TEST( event_get_read_array_status ),
-    ADD_TEST( event_get_info ),
-    ADD_TEST( event_wait_for_execute ),
-    ADD_TEST( event_wait_for_array ),
-    ADD_TEST( event_flush ),
-    ADD_TEST( event_finish_execute ),
-    ADD_TEST( event_finish_array ),
-    ADD_TEST( event_release_before_done ),
-    ADD_TEST( event_enqueue_marker ),
+    ADD_TEST(event_get_execute_status),
+    ADD_TEST(event_get_write_array_status),
+    ADD_TEST(event_get_read_array_status),
+    ADD_TEST(event_get_info),
+    ADD_TEST(event_wait_for_execute),
+    ADD_TEST(event_wait_for_array),
+    ADD_TEST(event_flush),
+    ADD_TEST(event_finish_execute),
+    ADD_TEST(event_finish_array),
+    ADD_TEST(event_release_before_done),
+    ADD_TEST(event_enqueue_marker),
 #ifdef CL_VERSION_1_2
-    ADD_TEST( event_enqueue_marker_with_event_list ),
-    ADD_TEST( event_enqueue_barrier_with_event_list ),
+    ADD_TEST(event_enqueue_marker_with_event_list),
+    ADD_TEST(event_enqueue_barrier_with_event_list),
 #endif
 
-    ADD_TEST( out_of_order_event_waitlist_single_queue ),
-    ADD_TEST( out_of_order_event_waitlist_multi_queue ),
-    ADD_TEST( out_of_order_event_waitlist_multi_queue_multi_device ),
-    ADD_TEST( out_of_order_event_enqueue_wait_for_events_single_queue ),
-    ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue ),
-    ADD_TEST( out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device ),
-    ADD_TEST( out_of_order_event_enqueue_marker_single_queue ),
-    ADD_TEST( out_of_order_event_enqueue_marker_multi_queue ),
-    ADD_TEST( out_of_order_event_enqueue_marker_multi_queue_multi_device ),
-    ADD_TEST( out_of_order_event_enqueue_barrier_single_queue ),
+    ADD_TEST(out_of_order_event_waitlist_single_queue),
+    ADD_TEST(out_of_order_event_waitlist_multi_queue),
+    ADD_TEST(out_of_order_event_waitlist_multi_queue_multi_device),
+    ADD_TEST(out_of_order_event_enqueue_wait_for_events_single_queue),
+    ADD_TEST(out_of_order_event_enqueue_wait_for_events_multi_queue),
+    ADD_TEST(
+        out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device),
+    ADD_TEST(out_of_order_event_enqueue_marker_single_queue),
+    ADD_TEST(out_of_order_event_enqueue_marker_multi_queue),
+    ADD_TEST(out_of_order_event_enqueue_marker_multi_queue_multi_device),
+    ADD_TEST(out_of_order_event_enqueue_barrier_single_queue),
 
-    ADD_TEST( waitlists ),
-    ADD_TEST( userevents ),
-    ADD_TEST( callbacks ),
-    ADD_TEST( callbacks_simultaneous ),
-    ADD_TEST( userevents_multithreaded ),
+    ADD_TEST(waitlists),
+    ADD_TEST(userevents),
+    ADD_TEST(callbacks),
+    ADD_TEST(callbacks_simultaneous),
+    ADD_TEST(userevents_multithreaded),
 };
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 int main(int argc, const char *argv[])
 {
     return runTestHarness(argc, argv, test_num, test_list, false, 0);
 }
-
diff --git a/test_conformance/events/procs.h b/test_conformance/events/procs.h
index f077c247..97309db3 100644
--- a/test_conformance/events/procs.h
+++ b/test_conformance/events/procs.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,44 +18,101 @@
 #include "harness/typeWrappers.h"
 #include "harness/clImageHelper.h"
 
-extern float    random_float(float low, float high);
-extern float    calculate_ulperror(float a, float b);
-
-
-extern int        test_event_get_execute_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_get_write_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_get_read_array_status(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_wait_for_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_wait_for_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_flush(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_finish_execute(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_finish_array(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_release_before_done(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_enqueue_marker(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-#ifdef CL_VERSION_1_2
-extern int        test_event_enqueue_marker_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_event_enqueue_barrier_with_event_list(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-#endif
+extern float random_float(float low, float high);
+extern float calculate_ulperror(float a, float b);
 
-extern int        test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_waitlist_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
 
-extern int        test_out_of_order_event_enqueue_wait_for_events_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_event_get_execute_status(cl_device_id deviceID,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
+extern int test_event_get_write_array_status(cl_device_id deviceID,
+                                             cl_context context,
+                                             cl_command_queue queue,
+                                             int num_elements);
+extern int test_event_get_read_array_status(cl_device_id deviceID,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements);
+extern int test_event_get_info(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_event_wait_for_execute(cl_device_id deviceID,
+                                       cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_event_wait_for_array(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_event_flush(cl_device_id deviceID, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_event_finish_execute(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+extern int test_event_finish_array(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements);
+extern int test_event_release_before_done(cl_device_id deviceID,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements);
+extern int test_event_enqueue_marker(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
+#ifdef CL_VERSION_1_2
+extern int test_event_enqueue_marker_with_event_list(cl_device_id deviceID,
+                                                     cl_context context,
+                                                     cl_command_queue queue,
+                                                     int num_elements);
+extern int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID,
+                                                      cl_context context,
+                                                      cl_command_queue queue,
+                                                      int num_elements);
+#endif
 
-extern int        test_out_of_order_event_enqueue_barrier_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID,
+                                                         cl_context context,
+                                                         cl_command_queue queue,
+                                                         int num_elements);
+extern int test_out_of_order_event_waitlist_multi_queue(cl_device_id deviceID,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements);
+extern int test_out_of_order_event_waitlist_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
 
-extern int        test_out_of_order_event_enqueue_marker_single_queue(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_out_of_order_event_enqueue_marker_multi_queue_multi_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
+extern int test_out_of_order_event_enqueue_wait_for_events_single_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_out_of_order_event_enqueue_wait_for_events_multi_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int
+test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
 
-extern int        test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int        test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int        test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int        test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
-extern int        test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements );
+extern int test_out_of_order_event_enqueue_barrier_single_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
 
+extern int test_out_of_order_event_enqueue_marker_single_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_out_of_order_event_enqueue_marker_multi_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
+extern int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements);
 
+extern int test_waitlists(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements);
+extern int test_userevents(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_callbacks(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements);
+extern int test_callbacks_simultaneous(cl_device_id deviceID,
+                                       cl_context context,
+                                       cl_command_queue queue,
+                                       int num_elements);
+extern int test_userevents_multithreaded(cl_device_id deviceID,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements);
diff --git a/test_conformance/events/testBase.h b/test_conformance/events/testBase.h
index 5b49bfd7..63086d7e 100644
--- a/test_conformance/events/testBase.h
+++ b/test_conformance/events/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,6 +26,3 @@
 #include "procs.h"
 
 #endif // _testBase_h
-
-
-
diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
index 47e898b9..911298a5 100644
--- a/test_conformance/events/test_callbacks.cpp
+++ b/test_conformance/events/test_callbacks.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,28 +18,34 @@
 #include "harness/conversions.h"
 #include "harness/ThreadPool.h"
 
-#if !defined (_MSC_VER)
+#if !defined(_MSC_VER)
 #include <unistd.h>
 #endif // !_MSC_VER
 
-extern const char *IGetStatusString( cl_int status );
+extern const char *IGetStatusString(cl_int status);
 
 #define PRINT_OPS 0
 
-// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU, not the OpenCL device)
-// to be atomic w.r.t. boolean values. Although if it isn't, we'll just miss the check on this bool
-// until the next time around, so it's not that big of a deal. Ideally, we'd be using a semaphore with
-// a trywait on it, but then that introduces the fun issue of what to do on Win32, etc. This way is
-// far more portable, and worst case of failure is a slightly longer test run.
+// Yes, this is somewhat nasty, in that we're relying on the CPU (the real CPU,
+// not the OpenCL device) to be atomic w.r.t. boolean values. Although if it
+// isn't, we'll just miss the check on this bool until the next time around, so
+// it's not that big of a deal. Ideally, we'd be using a semaphore with a
+// trywait on it, but then that introduces the fun issue of what to do on Win32,
+// etc. This way is far more portable, and worst case of failure is a slightly
+// longer test run.
 static bool sCallbackTriggered = false;
 
 
 #define EVENT_CALLBACK_TYPE_TOTAL 3
-static bool sCallbackTriggered_flag[ EVENT_CALLBACK_TYPE_TOTAL ] ={ false,false, false };
-cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNING, CL_COMPLETE};
+static bool sCallbackTriggered_flag[EVENT_CALLBACK_TYPE_TOTAL] = { false, false,
+                                                                   false };
+cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] = { CL_SUBMITTED,
+                                                           CL_RUNNING,
+                                                           CL_COMPLETE };
 
 // Our callback function
-/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+/*void CL_CALLBACK single_event_callback_function( cl_event event, cl_int
+commandStatus, void * userData )
 {
      int i=*static_cast<int *>(userData);
     log_info( "\tEvent callback  %d   triggered\n",  i);
@@ -47,67 +53,79 @@ cl_int event_callback_types[EVENT_CALLBACK_TYPE_TOTAL] ={ CL_SUBMITTED, CL_RUNNI
 }*/
 
 /*   use struct as call back para */
-typedef struct { cl_int enevt_type; int index; } CALL_BACK_USER_DATA;
+typedef struct
+{
+    cl_int enevt_type;
+    int index;
+} CALL_BACK_USER_DATA;
 
-void CL_CALLBACK single_event_callback_function_flags( cl_event event, cl_int commandStatus, void * userData )
+void CL_CALLBACK single_event_callback_function_flags(cl_event event,
+                                                      cl_int commandStatus,
+                                                      void *userData)
 {
-   // int i=*static_cast<int *>(userData);
-    CALL_BACK_USER_DATA *pdata= static_cast<CALL_BACK_USER_DATA *>(userData);
+    // int i=*static_cast<int *>(userData);
+    CALL_BACK_USER_DATA *pdata = static_cast<CALL_BACK_USER_DATA *>(userData);
 
-    log_info( "\tEvent callback  %d  of type %d triggered\n",  pdata->index, pdata->enevt_type);
-    sCallbackTriggered_flag [pdata->index ] = true;
+    log_info("\tEvent callback  %d  of type %d triggered\n", pdata->index,
+             pdata->enevt_type);
+    sCallbackTriggered_flag[pdata->index] = true;
 }
 
-int test_callback_event_single( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest )
+int test_callback_event_single(cl_device_id device, cl_context context,
+                               cl_command_queue queue, Action *actionToTest)
 {
-    // Note: we don't use the waiting feature here. We just want to verify that we get a callback called
-    // when the given event finishes
+    // Note: we don't use the waiting feature here. We just want to verify that
+    // we get a callback called when the given event finishes
 
-    cl_int error = actionToTest->Setup( device, context, queue );
-    test_error( error, "Unable to set up test action" );
+    cl_int error = actionToTest->Setup(device, context, queue);
+    test_error(error, "Unable to set up test action");
 
     // Set up a user event, which we use as a gate for the second event
-    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
-    test_error( error, "Unable to set up user gate event" );
+    clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+    test_error(error, "Unable to set up user gate event");
 
     // Set up the execution of the action with its actual event
     clEventWrapper actualEvent;
-    error = actionToTest->Execute( queue, 1, &gateEvent, &actualEvent );
-    test_error( error, "Unable to set up action execution" );
+    error = actionToTest->Execute(queue, 1, &gateEvent, &actualEvent);
+    test_error(error, "Unable to set up action execution");
 
     // Set up the callback on the actual event
 
-  /*  use struct as call back para */
-  CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
-  for( int i=0;i< EVENT_CALLBACK_TYPE_TOTAL; i++)
-  {
-       user_data[i].enevt_type=event_callback_types[i];
-       user_data[i].index =i;
-       error = clSetEventCallback( actualEvent, event_callback_types[i], single_event_callback_function_flags, user_data+i );
-
-  }
+    /*  use struct as call back para */
+    CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
+    for (int i = 0; i < EVENT_CALLBACK_TYPE_TOTAL; i++)
+    {
+        user_data[i].enevt_type = event_callback_types[i];
+        user_data[i].index = i;
+        error = clSetEventCallback(actualEvent, event_callback_types[i],
+                                   single_event_callback_function_flags,
+                                   user_data + i);
+    }
 
     // Now release the user event, which will allow our actual action to run
-    error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
-    test_error( error, "Unable to trigger gate event" );
+    error = clSetUserEventStatus(gateEvent, CL_COMPLETE);
+    test_error(error, "Unable to trigger gate event");
 
-    // Now we wait for completion. Note that we can actually wait on the event itself, at least at first
-    error = clWaitForEvents( 1, &actualEvent );
-    test_error( error, "Unable to wait for actual test event" );
+    // Now we wait for completion. Note that we can actually wait on the event
+    // itself, at least at first
+    error = clWaitForEvents(1, &actualEvent);
+    test_error(error, "Unable to wait for actual test event");
 
-    // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
-    if( sCallbackTriggered )
+    // Note: we can check our callback now, and it MIGHT have been triggered,
+    // but that's not guaranteed
+    if (sCallbackTriggered)
     {
         // We're all good, so return success
         return 0;
     }
 
-    // The callback has not yet been called, but that doesn't mean it won't be. So wait for it
-    log_info( "\tWaiting for callback..." );
-    fflush( stdout );
-    for( int i = 0; i < 10 * 10; i++ )
+    // The callback has not yet been called, but that doesn't mean it won't be.
+    // So wait for it
+    log_info("\tWaiting for callback...");
+    fflush(stdout);
+    for (int i = 0; i < 10 * 10; i++)
     {
-        usleep( 100000 );    // 1/10th second
+        usleep(100000); // 1/10th second
 
         int cc = 0;
         for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++)
@@ -116,206 +134,222 @@ int test_callback_event_single( cl_device_id device, cl_context context, cl_comm
                 cc++;
             }
 
-        if  (cc== EVENT_CALLBACK_TYPE_TOTAL  )
+        if (cc == EVENT_CALLBACK_TYPE_TOTAL)
         {
-            log_info( "\n" );
+            log_info("\n");
             return 0;
         }
-        log_info( "." );
-        fflush( stdout );
+        log_info(".");
+        fflush(stdout);
     }
 
     // If we got here, we never got the callback
-    log_error( "\nCallback not called within 10 seconds! (assuming failure)\n" );
+    log_error("\nCallback not called within 10 seconds! (assuming failure)\n");
     return -1;
 }
 
-#define TEST_ACTION( name ) \
-{    \
-    name##Action action;    \
-    log_info( "-- Testing " #name "...\n" );    \
-    if( ( error = test_callback_event_single( deviceID, context, queue, &action ) ) != CL_SUCCESS )    \
-        retVal++;            \
-    clFinish( queue ); \
-}
+#define TEST_ACTION(name)                                                      \
+    {                                                                          \
+        name##Action action;                                                   \
+        log_info("-- Testing " #name "...\n");                                 \
+        if ((error = test_callback_event_single(deviceID, context, queue,      \
+                                                &action))                      \
+            != CL_SUCCESS)                                                     \
+            retVal++;                                                          \
+        clFinish(queue);                                                       \
+    }
 
-int test_callbacks( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_callbacks(cl_device_id deviceID, cl_context context,
+                   cl_command_queue queue, int num_elements)
 {
     cl_int error;
     int retVal = 0;
 
-    log_info( "\n" );
+    log_info("\n");
 
-    TEST_ACTION( NDRangeKernel )
+    TEST_ACTION(NDRangeKernel)
 
-    TEST_ACTION( ReadBuffer )
-    TEST_ACTION( WriteBuffer )
-    TEST_ACTION( MapBuffer )
-    TEST_ACTION( UnmapBuffer )
+    TEST_ACTION(ReadBuffer)
+    TEST_ACTION(WriteBuffer)
+    TEST_ACTION(MapBuffer)
+    TEST_ACTION(UnmapBuffer)
 
-    if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
     {
-        log_info( "\nNote: device does not support images. Skipping remainder of callback tests...\n" );
+        log_info("\nNote: device does not support images. Skipping remainder "
+                 "of callback tests...\n");
     }
     else
     {
-        TEST_ACTION( ReadImage2D )
-        TEST_ACTION( WriteImage2D )
-        TEST_ACTION( CopyImage2Dto2D )
-        TEST_ACTION( Copy2DImageToBuffer )
-        TEST_ACTION( CopyBufferTo2DImage )
-        TEST_ACTION( MapImage )
-
-        if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
-            log_info( "\nNote: device does not support 3D images. Skipping remainder of waitlist tests...\n" );
+        TEST_ACTION(ReadImage2D)
+        TEST_ACTION(WriteImage2D)
+        TEST_ACTION(CopyImage2Dto2D)
+        TEST_ACTION(Copy2DImageToBuffer)
+        TEST_ACTION(CopyBufferTo2DImage)
+        TEST_ACTION(MapImage)
+
+        if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+            log_info("\nNote: device does not support 3D images. Skipping "
+                     "remainder of waitlist tests...\n");
         else
         {
-            TEST_ACTION( ReadImage3D )
-            TEST_ACTION( WriteImage3D )
-            TEST_ACTION( CopyImage2Dto3D )
-            TEST_ACTION( CopyImage3Dto2D )
-            TEST_ACTION( CopyImage3Dto3D )
-            TEST_ACTION( Copy3DImageToBuffer )
-            TEST_ACTION( CopyBufferTo3DImage )
+            TEST_ACTION(ReadImage3D)
+            TEST_ACTION(WriteImage3D)
+            TEST_ACTION(CopyImage2Dto3D)
+            TEST_ACTION(CopyImage3Dto2D)
+            TEST_ACTION(CopyImage3Dto3D)
+            TEST_ACTION(Copy3DImageToBuffer)
+            TEST_ACTION(CopyBufferTo3DImage)
         }
     }
 
     return retVal;
 }
 
-#define  SIMUTANEOUS_ACTION_TOTAL  18
-static bool sSimultaneousFlags[ 54 ];// for 18 actions with 3 callback status
+#define SIMUTANEOUS_ACTION_TOTAL 18
+static bool sSimultaneousFlags[54]; // for 18 actions with 3 callback status
 static volatile int sSimultaneousCount;
 
-Action * actions[ 19 ] = { 0 };
+Action *actions[19] = { 0 };
 
 // Callback for the simultaneous tests
-void CL_CALLBACK simultaneous_event_callback_function( cl_event event, cl_int commandStatus, void * userData )
+void CL_CALLBACK simultaneous_event_callback_function(cl_event event,
+                                                      cl_int commandStatus,
+                                                      void *userData)
 {
     int eventIndex = (int)(size_t)userData;
-  int actionIndex = eventIndex/EVENT_CALLBACK_TYPE_TOTAL;
-  int statusIndex = eventIndex%EVENT_CALLBACK_TYPE_TOTAL;
-    log_info( "\tEvent callback triggered for action %s callback type %s \n", actions[actionIndex]->GetName(), IGetStatusString(statusIndex) );
-    sSimultaneousFlags[ actionIndex ] = true;
-    ThreadPool_AtomicAdd(&sSimultaneousCount,1);
+    int actionIndex = eventIndex / EVENT_CALLBACK_TYPE_TOTAL;
+    int statusIndex = eventIndex % EVENT_CALLBACK_TYPE_TOTAL;
+    log_info("\tEvent callback triggered for action %s callback type %s \n",
+             actions[actionIndex]->GetName(), IGetStatusString(statusIndex));
+    sSimultaneousFlags[actionIndex] = true;
+    ThreadPool_AtomicAdd(&sSimultaneousCount, 1);
 }
 
-int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_callbacks_simultaneous(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     cl_int error;
 
-    // Unlike the singles test, in this one, we run a bunch of events all at once, to verify that
-    // the callbacks do get called once-and-only-once for each event, even if the run out of order or
-    // are dependent on each other
+    // Unlike the singles test, in this one, we run a bunch of events all at
+    // once, to verify that the callbacks do get called once-and-only-once for
+    // each event, even if the run out of order or are dependent on each other
 
     // First, the list of actions to run
     int actionCount = 0, index = 0;
 
-    actions[ index++ ] = new NDRangeKernelAction();
-    actions[ index++ ] = new ReadBufferAction();
-    actions[ index++ ] = new WriteBufferAction();
-    actions[ index++ ] = new MapBufferAction();
-    actions[ index++ ] = new UnmapBufferAction();
+    actions[index++] = new NDRangeKernelAction();
+    actions[index++] = new ReadBufferAction();
+    actions[index++] = new WriteBufferAction();
+    actions[index++] = new MapBufferAction();
+    actions[index++] = new UnmapBufferAction();
 
-    if( checkForImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    if (checkForImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED)
     {
-        actions[ index++ ] = new ReadImage2DAction();
-        actions[ index++ ] = new WriteImage2DAction();
-        actions[ index++ ] = new CopyImage2Dto2DAction();
-        actions[ index++ ] = new Copy2DImageToBufferAction();
-        actions[ index++ ] = new CopyBufferTo2DImageAction();
-        actions[ index++ ] = new MapImageAction();
-
-        if( checkFor3DImageSupport( deviceID ) != CL_IMAGE_FORMAT_NOT_SUPPORTED )
+        actions[index++] = new ReadImage2DAction();
+        actions[index++] = new WriteImage2DAction();
+        actions[index++] = new CopyImage2Dto2DAction();
+        actions[index++] = new Copy2DImageToBufferAction();
+        actions[index++] = new CopyBufferTo2DImageAction();
+        actions[index++] = new MapImageAction();
+
+        if (checkFor3DImageSupport(deviceID) != CL_IMAGE_FORMAT_NOT_SUPPORTED)
         {
-            actions[ index++ ] = new ReadImage3DAction();
-            actions[ index++ ] = new WriteImage3DAction();
-            actions[ index++ ] = new CopyImage2Dto3DAction();
-            actions[ index++ ] = new CopyImage3Dto2DAction();
-            actions[ index++ ] = new CopyImage3Dto3DAction();
-            actions[ index++ ] = new Copy3DImageToBufferAction();
-            actions[ index++ ] = new CopyBufferTo3DImageAction();
+            actions[index++] = new ReadImage3DAction();
+            actions[index++] = new WriteImage3DAction();
+            actions[index++] = new CopyImage2Dto3DAction();
+            actions[index++] = new CopyImage3Dto2DAction();
+            actions[index++] = new CopyImage3Dto3DAction();
+            actions[index++] = new Copy3DImageToBufferAction();
+            actions[index++] = new CopyBufferTo3DImageAction();
         }
     }
     actionCount = index;
-    actions[ index++ ] = NULL;
+    actions[index++] = NULL;
 
     // Now set them all up
-    log_info( "\tSetting up test events...\n" );
-    for( index = 0; actions[ index ] != NULL; index++ )
+    log_info("\tSetting up test events...\n");
+    for (index = 0; actions[index] != NULL; index++)
     {
-        error = actions[ index ]->Setup( deviceID, context, queue );
-        test_error( error, "Unable to set up test action" );
-        sSimultaneousFlags[ index ] = false;
+        error = actions[index]->Setup(deviceID, context, queue);
+        test_error(error, "Unable to set up test action");
+        sSimultaneousFlags[index] = false;
     }
     sSimultaneousCount = 0;
 
     // Set up the user event to start them all
-    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
-    test_error( error, "Unable to set up user gate event" );
+    clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+    test_error(error, "Unable to set up user gate event");
 
     // Start executing, all tied to the gate event
-    //clEventWrapper actionEvents[ 18 ];// current actionCount is 18
-    clEventWrapper *actionEvents= new clEventWrapper[actionCount];
+    // clEventWrapper actionEvents[ 18 ];// current actionCount is 18
+    clEventWrapper *actionEvents = new clEventWrapper[actionCount];
     if (actionEvents == NULL)
     {
         log_error(" memory error in test_callbacks_simultaneous  \n");
         for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
             if (actions[i]) delete actions[i];
-        return  -1;
+        return -1;
     }
 
-    RandomSeed seed( gRandomSeed );
-    for( index = 0; actions[ index ] != NULL; index++ )
+    RandomSeed seed(gRandomSeed);
+    for (index = 0; actions[index] != NULL; index++)
     {
         // Randomly choose to wait on the gate, or wait on the previous event
-        cl_event * eventPtr = &gateEvent;
-        if( ( index > 0 ) && ( random_in_range( 0, 255, seed ) & 1 ) )
-            eventPtr = &actionEvents[ index - 1 ];
+        cl_event *eventPtr = &gateEvent;
+        if ((index > 0) && (random_in_range(0, 255, seed) & 1))
+            eventPtr = &actionEvents[index - 1];
 
-        error = actions[ index ]->Execute( queue, 1, eventPtr, &actionEvents[ index ] );
-        test_error( error, "Unable to execute test action" );
+        error =
+            actions[index]->Execute(queue, 1, eventPtr, &actionEvents[index]);
+        test_error(error, "Unable to execute test action");
 
 
-    for( int k=0; k< EVENT_CALLBACK_TYPE_TOTAL; k++)
-    {
-       error = clSetEventCallback( actionEvents[index], event_callback_types[k], simultaneous_event_callback_function, (void *)(size_t)(index*EVENT_CALLBACK_TYPE_TOTAL+k ) );
-       test_error( error, "Unable to set event callback function" );
-
-    }
+        for (int k = 0; k < EVENT_CALLBACK_TYPE_TOTAL; k++)
+        {
+            error = clSetEventCallback(
+                actionEvents[index], event_callback_types[k],
+                simultaneous_event_callback_function,
+                (void *)(size_t)(index * EVENT_CALLBACK_TYPE_TOTAL + k));
+            test_error(error, "Unable to set event callback function");
+        }
     }
 
-  int total_callbacks= actionCount * EVENT_CALLBACK_TYPE_TOTAL;
+    int total_callbacks = actionCount * EVENT_CALLBACK_TYPE_TOTAL;
 
     // Now release the user event, which will allow our actual action to run
-    error = clSetUserEventStatus( gateEvent, CL_COMPLETE );
-    test_error( error, "Unable to trigger gate event" );
+    error = clSetUserEventStatus(gateEvent, CL_COMPLETE);
+    test_error(error, "Unable to trigger gate event");
 
     // Wait on the actual action events now
-    log_info( "\tWaiting for test completions...\n" );
-    error = clWaitForEvents( actionCount, &actionEvents[ 0 ] );
-    test_error( error, "Unable to wait for actual test events" );
-
-    // Note: we can check our callback now, and it MIGHT have been triggered, but that's not guaranteed
-  int last_count = 0;
-    if( ((last_count = sSimultaneousCount)) == total_callbacks)
+    log_info("\tWaiting for test completions...\n");
+    error = clWaitForEvents(actionCount, &actionEvents[0]);
+    test_error(error, "Unable to wait for actual test events");
+
+    // Note: we can check our callback now, and it MIGHT have been triggered,
+    // but that's not guaranteed
+    int last_count = 0;
+    if (((last_count = sSimultaneousCount)) == total_callbacks)
     {
         // We're all good, so return success
-        log_info( "\t%d of %d callbacks received\n", sSimultaneousCount, total_callbacks );
+        log_info("\t%d of %d callbacks received\n", sSimultaneousCount,
+                 total_callbacks);
 
-        if (actionEvents) delete [] actionEvents;
-    for (size_t i=0;i<(sizeof(actions)/sizeof(actions[0]));++i)
-      if (actions[i]) delete actions[i];
+        if (actionEvents) delete[] actionEvents;
+        for (size_t i = 0; i < (sizeof(actions) / sizeof(actions[0])); ++i)
+            if (actions[i]) delete actions[i];
         return 0;
     }
 
     // We haven't gotten (all) of the callbacks, so wait for them
-    log_info( "\tWe've only received %d of the %d callbacks we expected; waiting for more...\n", last_count, total_callbacks );
+    log_info("\tWe've only received %d of the %d callbacks we expected; "
+             "waiting for more...\n",
+             last_count, total_callbacks);
 
-    for( int i = 0; i < 10 * 10; i++ )
+    for (int i = 0; i < 10 * 10; i++)
     {
-        usleep( 100000 );    // 1/10th second
-        if( ((last_count = sSimultaneousCount)) == total_callbacks )
+        usleep(100000); // 1/10th second
+        if (((last_count = sSimultaneousCount)) == total_callbacks)
         {
             // All of the callbacks were executed
             if (actionEvents) delete[] actionEvents;
@@ -326,16 +360,15 @@ int test_callbacks_simultaneous( cl_device_id deviceID, cl_context context, cl_c
     }
 
     // If we got here, some of the callbacks did not occur in time
-    log_error( "\nError: We only ever received %d of our %d callbacks!\n", last_count, total_callbacks );
-    log_error( "Events that did not receive callbacks:\n" );
-    for( index = 0; actions[ index ] != NULL; index++ )
+    log_error("\nError: We only ever received %d of our %d callbacks!\n",
+              last_count, total_callbacks);
+    log_error("Events that did not receive callbacks:\n");
+    for (index = 0; actions[index] != NULL; index++)
     {
-        if( !sSimultaneousFlags[ index ] )
-            log_error( "\t%s\n", actions[ index ]->GetName() );
+        if (!sSimultaneousFlags[index])
+            log_error("\t%s\n", actions[index]->GetName());
     }
 
-  if (actionEvents) delete [] actionEvents;
+    if (actionEvents) delete[] actionEvents;
     return -1;
-
 }
-
diff --git a/test_conformance/events/test_event_dependencies.cpp b/test_conformance/events/test_event_dependencies.cpp
index 41136548..45b260a6 100644
--- a/test_conformance/events/test_event_dependencies.cpp
+++ b/test_conformance/events/test_event_dependencies.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -39,61 +39,79 @@ const char *write_kernels[] = {
 /*
  Tests event dependencies by running two kernels that use the same buffer.
  If two_queues is set they are run in separate queues.
- If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called between them.
- If test_barrier is set then clEnqueueBarrier is called between them (only for single queue).
- If neither are set, nothing is done to prevent them from executing in the wrong order. This can be used for verification.
+ If test_enqueue_wait_for_events is set then clEnqueueWaitForEvent is called
+ between them. If test_barrier is set then clEnqueueBarrier is called between
+ them (only for single queue). If neither are set, nothing is done to prevent
+ them from executing in the wrong order. This can be used for verification.
  */
-int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int two_queues, int two_devices,
-                                                int test_enqueue_wait_for_events, int test_barrier, int use_waitlist, int use_marker)
+int test_event_enqueue_wait_for_events_run_test(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements, int two_queues, int two_devices,
+    int test_enqueue_wait_for_events, int test_barrier, int use_waitlist,
+    int use_marker)
 {
     cl_int error = CL_SUCCESS;
-    size_t threads[3] = {TEST_SIZE,0,0};
+    size_t threads[3] = { TEST_SIZE, 0, 0 };
     int i, loop_count, event_count, expected_value, failed;
     int expected_if_only_queue[2];
     int max_count = TEST_SIZE;
 
     cl_platform_id platform;
-    cl_command_queue queues[2];    // Not a wrapper so we don't autorelease if they are the same
-    clCommandQueueWrapper queueWrappers[2];    // If they are different, we use the wrapper so it will auto release
+    cl_command_queue
+        queues[2]; // Not a wrapper so we don't autorelease if they are the same
+    clCommandQueueWrapper queueWrappers[2]; // If they are different, we use the
+                                            // wrapper so it will auto release
     clContextWrapper context_to_use;
     clMemWrapper data;
     clProgramWrapper program;
     clKernelWrapper kernel1[TEST_COUNT], kernel2[TEST_COUNT];
-    clEventWrapper event[TEST_COUNT*4+2]; // If we usemarkers we get 2 more events per iteration
+    clEventWrapper event[TEST_COUNT * 4 + 2]; // If we usemarkers we get 2 more
+                                              // events per iteration
 
     if (test_enqueue_wait_for_events)
-        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier "
+                 "function.\n");
     if (test_barrier)
-        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier function.\n");
+        log_info("\tTesting with clEnqueueBarrierWithWaitList as barrier "
+                 "function.\n");
     if (use_waitlist)
-        log_info("\tTesting with waitlist-based depenednecies between kernels.\n");
+        log_info(
+            "\tTesting with waitlist-based depenednecies between kernels.\n");
     if (use_marker)
         log_info("\tTesting with clEnqueueMarker as a barrier function.\n");
-    if (test_barrier && (two_queues || two_devices)) {
-        log_error("\tTest requested with clEnqueueBarrier across two queues. This is not a valid combination.\n");
+    if (test_barrier && (two_queues || two_devices))
+    {
+        log_error("\tTest requested with clEnqueueBarrier across two queues. "
+                  "This is not a valid combination.\n");
         return -1;
     }
 
     error = clGetPlatformIDs(1, &platform, NULL);
     test_error(error, "clGetPlatformIDs failed.");
 
-    // If we are to use two devices, then get them and create a context with both.
+    // If we are to use two devices, then get them and create a context with
+    // both.
     cl_device_id *two_device_ids;
-    if (two_devices) {
-        two_device_ids = (cl_device_id*)malloc(sizeof(cl_device_id)*2);
+    if (two_devices)
+    {
+        two_device_ids = (cl_device_id *)malloc(sizeof(cl_device_id) * 2);
         cl_uint number_returned;
-        error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids, &number_returned);
-        test_error( error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
-        if (number_returned != 2) {
+        error = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, two_device_ids,
+                               &number_returned);
+        test_error(error, "clGetDeviceIDs for CL_DEVICE_TYPE_ALL failed.");
+        if (number_returned != 2)
+        {
             log_info("Failed to obtain two devices. Test can not run.\n");
             free(two_device_ids);
             return 0;
         }
 
-        for (i=0; i<2; i++) {
+        for (i = 0; i < 2; i++)
+        {
             cl_device_type type;
-            error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
-            test_error( error, "clGetDeviceInfo failed.");
+            error = clGetDeviceInfo(two_device_ids[i], CL_DEVICE_TYPE,
+                                    sizeof(cl_device_type), &type, NULL);
+            test_error(error, "clGetDeviceInfo failed.");
             if (type & CL_DEVICE_TYPE_CPU)
                 log_info("\tDevice %d is CL_DEVICE_TYPE_CPU.\n", i);
             if (type & CL_DEVICE_TYPE_GPU)
@@ -104,12 +122,16 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
                 log_info("\tDevice %d is CL_DEVICE_TYPE_DEFAULT.\n", i);
         }
 
-        context_to_use = clCreateContext(NULL, 2, two_device_ids, notify_callback, NULL, &error);
+        context_to_use = clCreateContext(NULL, 2, two_device_ids,
+                                         notify_callback, NULL, &error);
         test_error(error, "clCreateContext failed for two devices.");
 
         log_info("\tTesting with two devices.\n");
-    } else {
-        context_to_use = clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
+    }
+    else
+    {
+        context_to_use =
+            clCreateContext(NULL, 1, &deviceID, NULL, NULL, &error);
         test_error(error, "clCreateContext failed for one device.");
 
         log_info("\tTesting with one device.\n");
@@ -117,41 +139,55 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
 
     // If we are using two queues then create them
     cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
-    if (two_queues) {
+    if (two_queues)
+    {
         // Get a second queue
         if (two_devices)
         {
-            if( !checkDeviceForQueueSupport( two_device_ids[ 0 ], props ) ||
-               !checkDeviceForQueueSupport( two_device_ids[ 1 ], props ) )
+            if (!checkDeviceForQueueSupport(two_device_ids[0], props)
+                || !checkDeviceForQueueSupport(two_device_ids[1], props))
             {
-                log_info( "WARNING: One or more device for multi-device test does not support out-of-order exec mode; skipping test.\n" );
+                log_info(
+                    "WARNING: One or more device for multi-device test does "
+                    "not support out-of-order exec mode; skipping test.\n");
                 return -1942;
             }
 
-            queueWrappers[0] = clCreateCommandQueue(context_to_use, two_device_ids[0], props, &error);
-            test_error(error, "clCreateCommandQueue for first queue on first device failed.");
-            queueWrappers[1] = clCreateCommandQueue(context_to_use, two_device_ids[1], props, &error);
-            test_error(error, "clCreateCommandQueue for second queue on second device failed.");
-
+            queueWrappers[0] = clCreateCommandQueue(
+                context_to_use, two_device_ids[0], props, &error);
+            test_error(
+                error,
+                "clCreateCommandQueue for first queue on first device failed.");
+            queueWrappers[1] = clCreateCommandQueue(
+                context_to_use, two_device_ids[1], props, &error);
+            test_error(error,
+                       "clCreateCommandQueue for second queue on second device "
+                       "failed.");
         }
         else
         {
-            // Single device has already been checked for out-of-order exec support
-            queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+            // Single device has already been checked for out-of-order exec
+            // support
+            queueWrappers[0] =
+                clCreateCommandQueue(context_to_use, deviceID, props, &error);
             test_error(error, "clCreateCommandQueue for first queue failed.");
-            queueWrappers[1] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+            queueWrappers[1] =
+                clCreateCommandQueue(context_to_use, deviceID, props, &error);
             test_error(error, "clCreateCommandQueue for second queue failed.");
         }
-        // Ugly hack to make sure we only have the wrapper auto-release if they are different queues
+        // Ugly hack to make sure we only have the wrapper auto-release if they
+        // are different queues
         queues[0] = queueWrappers[0];
         queues[1] = queueWrappers[1];
         log_info("\tTesting with two queues.\n");
     }
     else
     {
-        // (Note: single device has already been checked for out-of-order exec support)
-        // Otherwise create one queue and have the second one be the same
-        queueWrappers[0] = clCreateCommandQueue(context_to_use, deviceID, props, &error);
+        // (Note: single device has already been checked for out-of-order exec
+        // support) Otherwise create one queue and have the second one be the
+        // same
+        queueWrappers[0] =
+            clCreateCommandQueue(context_to_use, deviceID, props, &error);
         test_error(error, "clCreateCommandQueue for first queue failed.");
         queues[0] = queueWrappers[0];
         queues[1] = (cl_command_queue)queues[0];
@@ -160,236 +196,346 @@ int test_event_enqueue_wait_for_events_run_test( cl_device_id deviceID, cl_conte
 
 
     // Setup - create a buffer and the two kernels
-    data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE, TEST_SIZE*sizeof(cl_int), NULL, &error);
-    test_error( error, "clCreateBuffer failed");
+    data = clCreateBuffer(context_to_use, CL_MEM_READ_WRITE,
+                          TEST_SIZE * sizeof(cl_int), NULL, &error);
+    test_error(error, "clCreateBuffer failed");
 
 
     // Initialize the values to zero
-    cl_int *values = (cl_int*)malloc(TEST_SIZE*sizeof(cl_int));
-    for (i=0; i<(int)TEST_SIZE; i++)
-        values[i] = 0;
-    error = clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, NULL);
-    test_error( error, "clEnqueueWriteBuffer failed");
+    cl_int *values = (cl_int *)malloc(TEST_SIZE * sizeof(cl_int));
+    for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 0;
+    error =
+        clEnqueueWriteBuffer(queues[0], data, CL_TRUE, 0,
+                             TEST_SIZE * sizeof(cl_int), values, 0, NULL, NULL);
+    test_error(error, "clEnqueueWriteBuffer failed");
     expected_value = 0;
 
     // Build the kernels
-    if (create_single_kernel_helper( context_to_use, &program, &kernel1[0], 1, write_kernels, "write_up" ))
+    if (create_single_kernel_helper(context_to_use, &program, &kernel1[0], 1,
+                                    write_kernels, "write_up"))
         return -1;
 
     error = clSetKernelArg(kernel1[0], 0, sizeof(data), &data);
     error |= clSetKernelArg(kernel1[0], 1, sizeof(max_count), &max_count);
-    test_error( error, "clSetKernelArg 1 failed");
+    test_error(error, "clSetKernelArg 1 failed");
 
-    for (i=1; i<TEST_COUNT; i++) {
+    for (i = 1; i < TEST_COUNT; i++)
+    {
         kernel1[i] = clCreateKernel(program, "write_up", &error);
-        test_error( error, "clCreateKernel 1 failed");
+        test_error(error, "clCreateKernel 1 failed");
 
         error = clSetKernelArg(kernel1[i], 0, sizeof(data), &data);
         error |= clSetKernelArg(kernel1[i], 1, sizeof(max_count), &max_count);
-        test_error( error, "clSetKernelArg 1 failed");
+        test_error(error, "clSetKernelArg 1 failed");
     }
 
-    for (i=0; i<TEST_COUNT; i++) {
+    for (i = 0; i < TEST_COUNT; i++)
+    {
         kernel2[i] = clCreateKernel(program, "write_down", &error);
-        test_error( error, "clCreateKernel 2 failed");
+        test_error(error, "clCreateKernel 2 failed");
 
         error = clSetKernelArg(kernel2[i], 0, sizeof(data), &data);
         error |= clSetKernelArg(kernel2[i], 1, sizeof(max_count), &max_count);
-        test_error( error, "clSetKernelArg 2 failed");
+        test_error(error, "clSetKernelArg 2 failed");
     }
 
-    // Execution - run the first kernel, then enqueue the wait on the events, then the second kernel
-    // If clEnqueueBarrierWithWaitList works, the buffer will be filled with 1s, then multiplied by 4s,
-    // then incremented to 5s, repeatedly. Otherwise the values may be 2s (if the first one doesn't work) or 8s
-    // (if the second one doesn't work).
+    // Execution - run the first kernel, then enqueue the wait on the events,
+    // then the second kernel If clEnqueueBarrierWithWaitList works, the buffer
+    // will be filled with 1s, then multiplied by 4s, then incremented to 5s,
+    // repeatedly. Otherwise the values may be 2s (if the first one doesn't
+    // work) or 8s (if the second one doesn't work).
     if (RANDOMIZE)
         log_info("Queues chosen randomly for each kernel execution.\n");
     else
         log_info("Queues chosen alternatily for each kernel execution.\n");
 
     event_count = 0;
-    for (i=0; i<(int)TEST_SIZE; i++)
-        values[i] = 1;
-    error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0, TEST_SIZE*sizeof(cl_int), values, 0, NULL, &event[event_count]);
-    test_error( error, "clEnqueueWriteBuffer 2 failed");
+    for (i = 0; i < (int)TEST_SIZE; i++) values[i] = 1;
+    error = clEnqueueWriteBuffer(queues[0], data, CL_FALSE, 0,
+                                 TEST_SIZE * sizeof(cl_int), values, 0, NULL,
+                                 &event[event_count]);
+    test_error(error, "clEnqueueWriteBuffer 2 failed");
     expected_value = 1;
     expected_if_only_queue[0] = 1;
     expected_if_only_queue[1] = 1;
 
     int queue_to_use = 1;
-    if (test_enqueue_wait_for_events) {
-        error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
-        test_error( error, "Unable to queue wait for events" );
-    } else if (test_barrier) {
-        error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
-        test_error( error, "Unable to queue barrier" );
+    if (test_enqueue_wait_for_events)
+    {
+        error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+                                             &event[event_count], NULL);
+        test_error(error, "Unable to queue wait for events");
+    }
+    else if (test_barrier)
+    {
+        error =
+            clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
+        test_error(error, "Unable to queue barrier");
     }
 
-    for (loop_count=0; loop_count<TEST_COUNT; loop_count++) {
+    for (loop_count = 0; loop_count < TEST_COUNT; loop_count++)
+    {
         // Execute kernel 1
         event_count++;
-        if (use_waitlist | use_marker) {
-            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
-            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
-        } else {
-            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
-            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel1[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+        if (use_waitlist | use_marker)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, "
+                         "NULL, threads, NULL, 1, &event[%d], &event[%d])\n",
+                         queue_to_use, loop_count, event_count - 1,
+                         event_count);
+            error = clEnqueueNDRangeKernel(
+                queues[queue_to_use], kernel1[loop_count], 1, NULL, threads,
+                NULL, 1, &event[event_count - 1], &event[event_count]);
         }
-        if (error) {
+        else
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueNDRangeKernel(queues[%d], kernel1[%d], 1, "
+                         "NULL, threads, NULL, 0, NULL, &event[%d])\n",
+                         queue_to_use, loop_count, event_count);
+            error = clEnqueueNDRangeKernel(
+                queues[queue_to_use], kernel1[loop_count], 1, NULL, threads,
+                NULL, 0, NULL, &event[event_count]);
+        }
+        if (error)
+        {
             log_info("\tLoop count %d\n", loop_count);
-            print_error( error, "clEnqueueNDRangeKernel for kernel 1 failed");
+            print_error(error, "clEnqueueNDRangeKernel for kernel 1 failed");
             return error;
         }
         expected_value *= 2;
         expected_if_only_queue[queue_to_use] *= 2;
 
         // If we are using a marker, it needs to go in the same queue
-        if (use_marker) {
+        if (use_marker)
+        {
             event_count++;
-            if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
-
-            #ifdef CL_VERSION_1_2
-                error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
-            #else
-                error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
-            #endif
-
+            if (DEBUG_OUT)
+                log_info("clEnqueueMarker(queues[%d], event[%d])\n",
+                         queue_to_use, event_count);
+
+#ifdef CL_VERSION_1_2
+            error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL,
+                                                &event[event_count]);
+#else
+            error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
+#endif
         }
 
         // Pick the next queue to run
         if (RANDOMIZE)
-            queue_to_use = rand()%2;
+            queue_to_use = rand() % 2;
         else
-            queue_to_use = (queue_to_use + 1)%2;
+            queue_to_use = (queue_to_use + 1) % 2;
 
         // Put in a barrier if requested
-        if (test_enqueue_wait_for_events) {
-            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
-            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL);
-            test_error( error, "Unable to queue wait for events" );
-        } else if (test_barrier) {
-            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
-            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
-            test_error( error, "Unable to queue barrier" );
+        if (test_enqueue_wait_for_events)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, "
+                         "&event[%d], NULL)\n",
+                         queue_to_use, event_count);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+                                                 &event[event_count], NULL);
+            test_error(error, "Unable to queue wait for events");
+        }
+        else if (test_barrier)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueBarrierWithWaitList(queues[%d])\n",
+                         queue_to_use);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL,
+                                                 NULL);
+            test_error(error, "Unable to queue barrier");
         }
 
         // Execute Kernel 2
         event_count++;
-        if (use_waitlist | use_marker) {
-            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 1, &event[%d], &event[%d])\n", queue_to_use, loop_count, event_count-1, event_count);
-            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 1, &event[event_count-1], &event[event_count]);
-        } else {
-            if (DEBUG_OUT) log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, NULL, threads, NULL, 0, NULL, &event[%d])\n", queue_to_use, loop_count, event_count);
-            error = clEnqueueNDRangeKernel(queues[queue_to_use], kernel2[loop_count], 1, NULL, threads, NULL, 0, NULL, &event[event_count]);
+        if (use_waitlist | use_marker)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, "
+                         "NULL, threads, NULL, 1, &event[%d], &event[%d])\n",
+                         queue_to_use, loop_count, event_count - 1,
+                         event_count);
+            error = clEnqueueNDRangeKernel(
+                queues[queue_to_use], kernel2[loop_count], 1, NULL, threads,
+                NULL, 1, &event[event_count - 1], &event[event_count]);
+        }
+        else
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueNDRangeKernel(queues[%d], kernel2[%d], 1, "
+                         "NULL, threads, NULL, 0, NULL, &event[%d])\n",
+                         queue_to_use, loop_count, event_count);
+            error = clEnqueueNDRangeKernel(
+                queues[queue_to_use], kernel2[loop_count], 1, NULL, threads,
+                NULL, 0, NULL, &event[event_count]);
         }
-        if (error) {
+        if (error)
+        {
             log_info("\tLoop count %d\n", loop_count);
-            print_error( error, "clEnqueueNDRangeKernel for kernel 2 failed");
+            print_error(error, "clEnqueueNDRangeKernel for kernel 2 failed");
             return error;
         }
         expected_value--;
         expected_if_only_queue[queue_to_use]--;
 
         // If we are using a marker, it needs to go in the same queue
-        if (use_marker) {
+        if (use_marker)
+        {
             event_count++;
-            if (DEBUG_OUT) log_info("clEnqueueMarker(queues[%d], event[%d])\n", queue_to_use, event_count);
-
-        #ifdef CL_VERSION_1_2
-            error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL, &event[event_count]);
-        #else
+            if (DEBUG_OUT)
+                log_info("clEnqueueMarker(queues[%d], event[%d])\n",
+                         queue_to_use, event_count);
+
+#ifdef CL_VERSION_1_2
+            error = clEnqueueMarkerWithWaitList(queues[queue_to_use], 0, NULL,
+                                                &event[event_count]);
+#else
             error = clEnqueueMarker(queues[queue_to_use], &event[event_count]);
-        #endif
+#endif
         }
 
         // Pick the next queue to run
         if (RANDOMIZE)
-            queue_to_use = rand()%2;
+            queue_to_use = rand() % 2;
         else
-            queue_to_use = (queue_to_use + 1)%2;
+            queue_to_use = (queue_to_use + 1) % 2;
 
         // Put in a barrier if requested
-        if (test_enqueue_wait_for_events) {
-            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, &event[%d], NULL)\n", queue_to_use, event_count);
-            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1, &event[event_count], NULL );
-            test_error( error, "Unable to queue wait for events" );
-        } else if (test_barrier) {
-            if (DEBUG_OUT) log_info("clEnqueueBarrierWithWaitList(queues[%d])\n", queue_to_use);
-            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL, NULL);
-            test_error( error, "Unable to queue barrier" );
+        if (test_enqueue_wait_for_events)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueBarrierWithWaitList(queues[%d], 1, "
+                         "&event[%d], NULL)\n",
+                         queue_to_use, event_count);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 1,
+                                                 &event[event_count], NULL);
+            test_error(error, "Unable to queue wait for events");
+        }
+        else if (test_barrier)
+        {
+            if (DEBUG_OUT)
+                log_info("clEnqueueBarrierWithWaitList(queues[%d])\n",
+                         queue_to_use);
+            error = clEnqueueBarrierWithWaitList(queues[queue_to_use], 0, NULL,
+                                                 NULL);
+            test_error(error, "Unable to queue barrier");
         }
     }
 
     // Now finish up everything
-    if (two_queues) {
+    if (two_queues)
+    {
         error = clFlush(queues[1]);
-        test_error( error, "clFlush[1] failed");
+        test_error(error, "clFlush[1] failed");
     }
 
-    error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0, TEST_SIZE*sizeof(cl_int), values, 1, &event[event_count], NULL);
+    error = clEnqueueReadBuffer(queues[0], data, CL_TRUE, 0,
+                                TEST_SIZE * sizeof(cl_int), values, 1,
+                                &event[event_count], NULL);
 
     test_error(error, "clEnqueueReadBuffer failed");
 
     failed = 0;
-    for (i=0; i<(int)TEST_SIZE; i++)
-        if (values[i] != expected_value) {
+    for (i = 0; i < (int)TEST_SIZE; i++)
+        if (values[i] != expected_value)
+        {
             failed = 1;
-            log_info("\tvalues[%d] = %d, expected %d (If only queue 1 accessed memory: %d only queue 2 accessed memory: %d)\n",
-                     i, values[i], expected_value, expected_if_only_queue[0], expected_if_only_queue[1]);
+            log_info("\tvalues[%d] = %d, expected %d (If only queue 1 accessed "
+                     "memory: %d only queue 2 accessed memory: %d)\n",
+                     i, values[i], expected_value, expected_if_only_queue[0],
+                     expected_if_only_queue[1]);
             break;
         }
 
     free(values);
-    if (two_devices)
-        free(two_device_ids);
+    if (two_devices) free(two_device_ids);
 
     return failed;
 }
 
-int test( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
-         int two_queues, int two_devices,
-         int test_enqueue_wait_for_events, int test_barrier, int use_waitlists, int use_marker)
+int test(cl_device_id deviceID, cl_context context, cl_command_queue queue,
+         int num_elements, int two_queues, int two_devices,
+         int test_enqueue_wait_for_events, int test_barrier, int use_waitlists,
+         int use_marker)
 {
-    if( !checkDeviceForQueueSupport( deviceID, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) )
+    if (!checkDeviceForQueueSupport(deviceID,
+                                    CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE))
     {
-        log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+        log_info("WARNING: Device does not support out-of-order exec mode; "
+                 "skipping test.\n");
         return 0;
     }
 
-    log_info("Running test for baseline results to determine if out-of-order execution can be detected...\n");
-    int baseline_results = test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices, 0, 0, 0, 0);
-    if (baseline_results == 0) {
+    log_info("Running test for baseline results to determine if out-of-order "
+             "execution can be detected...\n");
+    int baseline_results = test_event_enqueue_wait_for_events_run_test(
+        deviceID, context, queue, num_elements, two_queues, two_devices, 0, 0,
+        0, 0);
+    if (baseline_results == 0)
+    {
         if (test_enqueue_wait_for_events)
-            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+            log_info(
+                "WARNING: could not detect any out-of-order execution without "
+                "using clEnqueueBarrierWithWaitList, so this test is not a "
+                "valid test of out-of-order event dependencies.\n");
         if (test_barrier)
-            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueBarrierWithWaitList, so this test is not a valid test of out-of-order event dependencies.\n");
+            log_info(
+                "WARNING: could not detect any out-of-order execution without "
+                "using clEnqueueBarrierWithWaitList, so this test is not a "
+                "valid test of out-of-order event dependencies.\n");
         if (use_waitlists)
-            log_info("WARNING: could not detect any out-of-order execution without using waitlists, so this test is not a valid test of out-of-order event dependencies.\n");
+            log_info("WARNING: could not detect any out-of-order execution "
+                     "without using waitlists, so this test is not a valid "
+                     "test of out-of-order event dependencies.\n");
         if (use_marker)
-            log_info("WARNING: could not detect any out-of-order execution without using clEnqueueMarker, so this test is not a valid test of out-of-order event dependencies.\n");
-    } else if (baseline_results == 1) {
+            log_info("WARNING: could not detect any out-of-order execution "
+                     "without using clEnqueueMarker, so this test is not a "
+                     "valid test of out-of-order event dependencies.\n");
+    }
+    else if (baseline_results == 1)
+    {
         if (test_enqueue_wait_for_events)
-            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+            log_info("Detected incorrect execution (possibly out-of-order) "
+                     "without clEnqueueBarrierWithWaitList. Test can be a "
+                     "valid test of out-of-order event dependencies.\n");
         if (test_barrier)
-            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueBarrierWithWaitList. Test can be a valid test of out-of-order event dependencies.\n");
+            log_info("Detected incorrect execution (possibly out-of-order) "
+                     "without clEnqueueBarrierWithWaitList. Test can be a "
+                     "valid test of out-of-order event dependencies.\n");
         if (use_waitlists)
-            log_info("Detected incorrect execution (possibly out-of-order) without waitlists. Test can be a valid test of out-of-order event dependencies.\n");
+            log_info("Detected incorrect execution (possibly out-of-order) "
+                     "without waitlists. Test can be a valid test of "
+                     "out-of-order event dependencies.\n");
         if (use_marker)
-            log_info("Detected incorrect execution (possibly out-of-order) without clEnqueueMarker. Test can be a valid test of out-of-order event dependencies.\n");
-    } else if( baseline_results == -1942 ) {
+            log_info("Detected incorrect execution (possibly out-of-order) "
+                     "without clEnqueueMarker. Test can be a valid test of "
+                     "out-of-order event dependencies.\n");
+    }
+    else if (baseline_results == -1942)
+    {
         // Just ignore and return (out-of-order exec mode not supported)
         return 0;
-    } else {
+    }
+    else
+    {
         print_error(baseline_results, "Baseline run failed");
         return baseline_results;
     }
     log_info("Running test for actual results...\n");
-    return test_event_enqueue_wait_for_events_run_test(deviceID, context, queue, num_elements, two_queues, two_devices,
-                                                       test_enqueue_wait_for_events, test_barrier,  use_waitlists, use_marker);
+    return test_event_enqueue_wait_for_events_run_test(
+        deviceID, context, queue, num_elements, two_queues, two_devices,
+        test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
 }
 
 
-int test_out_of_order_event_waitlist_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_single_queue(cl_device_id deviceID,
+                                                  cl_context context,
+                                                  cl_command_queue queue,
+                                                  int num_elements)
 {
     int two_queues = 0;
     int two_devices = 0;
@@ -397,10 +543,15 @@ int test_out_of_order_event_waitlist_single_queue( cl_device_id deviceID, cl_con
     int test_barrier = 0;
     int use_waitlists = 1;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
-int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_multi_queue(cl_device_id deviceID,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements)
 {
     int two_queues = 1;
     int two_devices = 0;
@@ -408,10 +559,14 @@ int test_out_of_order_event_waitlist_multi_queue( cl_device_id deviceID, cl_cont
     int test_barrier = 0;
     int use_waitlists = 1;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
-int test_out_of_order_event_waitlist_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_waitlist_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
     int two_queues = 1;
     int two_devices = 1;
@@ -419,11 +574,15 @@ int test_out_of_order_event_waitlist_multi_queue_multi_device( cl_device_id devi
     int test_barrier = 0;
     int use_waitlists = 1;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
 
-int test_out_of_order_event_enqueue_wait_for_events_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_single_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
     int two_queues = 0;
     int two_devices = 0;
@@ -431,10 +590,14 @@ int test_out_of_order_event_enqueue_wait_for_events_single_queue( cl_device_id d
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
-int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_multi_queue(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
     int two_queues = 1;
     int two_devices = 0;
@@ -442,11 +605,15 @@ int test_out_of_order_event_enqueue_wait_for_events_multi_queue( cl_device_id de
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
 
-int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
     int two_queues = 1;
     int two_devices = 1;
@@ -454,13 +621,16 @@ int test_out_of_order_event_enqueue_wait_for_events_multi_queue_multi_device( cl
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
 
-
-
-int test_out_of_order_event_enqueue_barrier_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_barrier_single_queue(cl_device_id deviceID,
+                                                         cl_context context,
+                                                         cl_command_queue queue,
+                                                         int num_elements)
 {
     int two_queues = 0;
     int two_devices = 0;
@@ -468,11 +638,16 @@ int test_out_of_order_event_enqueue_barrier_single_queue( cl_device_id deviceID,
     int test_barrier = 1;
     int use_waitlists = 0;
     int use_marker = 0;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
 
-int test_out_of_order_event_enqueue_marker_single_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_single_queue(cl_device_id deviceID,
+                                                        cl_context context,
+                                                        cl_command_queue queue,
+                                                        int num_elements)
 {
     int two_queues = 0;
     int two_devices = 0;
@@ -480,10 +655,15 @@ int test_out_of_order_event_enqueue_marker_single_queue( cl_device_id deviceID,
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 1;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
-int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_multi_queue(cl_device_id deviceID,
+                                                       cl_context context,
+                                                       cl_command_queue queue,
+                                                       int num_elements)
 {
     int two_queues = 1;
     int two_devices = 0;
@@ -491,11 +671,15 @@ int test_out_of_order_event_enqueue_marker_multi_queue( cl_device_id deviceID, c
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 1;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
 
 
-int test_out_of_order_event_enqueue_marker_multi_queue_multi_device( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_out_of_order_event_enqueue_marker_multi_queue_multi_device(
+    cl_device_id deviceID, cl_context context, cl_command_queue queue,
+    int num_elements)
 {
     int two_queues = 1;
     int two_devices = 1;
@@ -503,7 +687,7 @@ int test_out_of_order_event_enqueue_marker_multi_queue_multi_device( cl_device_i
     int test_barrier = 0;
     int use_waitlists = 0;
     int use_marker = 1;
-    return test(deviceID, context, queue, num_elements, two_queues, two_devices, test_enqueue_wait_for_events, test_barrier, use_waitlists, use_marker);
+    return test(deviceID, context, queue, num_elements, two_queues, two_devices,
+                test_enqueue_wait_for_events, test_barrier, use_waitlists,
+                use_marker);
 }
-
-
diff --git a/test_conformance/events/test_events.cpp b/test_conformance/events/test_events.cpp
index c0efe864..34157fa0 100644
--- a/test_conformance/events/test_events.cpp
+++ b/test_conformance/events/test_events.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -15,97 +15,112 @@
 //
 #include "testBase.h"
 
-#if ! defined( _WIN32 )
-    #include "unistd.h" // for "sleep" used in the "while (1)" busy wait loop in
+#if !defined(_WIN32)
+#include "unistd.h" // for "sleep" used in the "while (1)" busy wait loop in
 #endif
 // test_event_flush
 
 const char *sample_long_test_kernel[] = {
-"__kernel void sample_test(__global float *src, __global int *dst)\n"
-"{\n"
-"    int  tid = get_global_id(0);\n"
-"     int  i;\n"
-"\n"
-"    for( i = 0; i < 10000; i++ )\n"
-"    {\n"
-"        dst[tid] = (int)src[tid] * 3;\n"
-"    }\n"
-"\n"
-"}\n" };
-
-int create_and_execute_kernel( cl_context inContext, cl_command_queue inQueue, cl_program *outProgram, cl_kernel *outKernel, cl_mem *streams,
-                              unsigned int lineCount, const char **lines, const char *kernelName, cl_event *outEvent )
+    "__kernel void sample_test(__global float *src, __global int *dst)\n"
+    "{\n"
+    "    int  tid = get_global_id(0);\n"
+    "     int  i;\n"
+    "\n"
+    "    for( i = 0; i < 10000; i++ )\n"
+    "    {\n"
+    "        dst[tid] = (int)src[tid] * 3;\n"
+    "    }\n"
+    "\n"
+    "}\n"
+};
+
+int create_and_execute_kernel(cl_context inContext, cl_command_queue inQueue,
+                              cl_program *outProgram, cl_kernel *outKernel,
+                              cl_mem *streams, unsigned int lineCount,
+                              const char **lines, const char *kernelName,
+                              cl_event *outEvent)
 {
     size_t threads[1] = { 1000 }, localThreads[1];
     int error;
 
-    if( create_single_kernel_helper( inContext, outProgram, outKernel, lineCount, lines, kernelName ) )
+    if (create_single_kernel_helper(inContext, outProgram, outKernel, lineCount,
+                                    lines, kernelName))
     {
         return -1;
     }
 
-    error = get_max_common_work_group_size( inContext, *outKernel, threads[0], &localThreads[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error = get_max_common_work_group_size(inContext, *outKernel, threads[0],
+                                           &localThreads[0]);
+    test_error(error, "Unable to get work group size to use");
 
     streams[0] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
                                 sizeof(cl_float) * 1000, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
     streams[1] = clCreateBuffer(inContext, CL_MEM_READ_WRITE,
                                 sizeof(cl_int) * 1000, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
     /* Set the arguments */
-    error = clSetKernelArg( *outKernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set kernel arguments" );
-    error = clSetKernelArg( *outKernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set kernel arguments" );
+    error = clSetKernelArg(*outKernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set kernel arguments");
+    error = clSetKernelArg(*outKernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set kernel arguments");
 
-    error = clEnqueueNDRangeKernel(inQueue, *outKernel, 1, NULL, threads, localThreads, 0, NULL, outEvent);
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(inQueue, *outKernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, outEvent);
+    test_error(error, "Unable to execute test kernel");
 
     return 0;
 }
 
-#define SETUP_EVENT( c, q ) \
-clProgramWrapper program; \
-clKernelWrapper kernel; \
-clMemWrapper streams[2]; \
-clEventWrapper event; \
-int error; \
-if( create_and_execute_kernel( c, q, &program, &kernel, &streams[0], 1, sample_long_test_kernel, "sample_test", &event ) ) return -1;
+#define SETUP_EVENT(c, q)                                                      \
+    clProgramWrapper program;                                                  \
+    clKernelWrapper kernel;                                                    \
+    clMemWrapper streams[2];                                                   \
+    clEventWrapper event;                                                      \
+    int error;                                                                 \
+    if (create_and_execute_kernel(c, q, &program, &kernel, &streams[0], 1,     \
+                                  sample_long_test_kernel, "sample_test",      \
+                                  &event))                                     \
+        return -1;
 
 #define FINISH_EVENT(_q) clFinish(_q)
 
-const char *IGetStatusString( cl_int status )
+const char *IGetStatusString(cl_int status)
 {
-    static char tempString[ 128 ];
-    switch( status )
+    static char tempString[128];
+    switch (status)
     {
-        case CL_COMPLETE:    return "CL_COMPLETE";
-        case CL_RUNNING:    return "CL_RUNNING";
-        case CL_QUEUED:        return "CL_QUEUED";
-        case CL_SUBMITTED:    return "CL_SUBMITTED";
+        case CL_COMPLETE: return "CL_COMPLETE";
+        case CL_RUNNING: return "CL_RUNNING";
+        case CL_QUEUED: return "CL_QUEUED";
+        case CL_SUBMITTED: return "CL_SUBMITTED";
         default:
-            sprintf( tempString, "<unknown: %d>", (int)status );
+            sprintf(tempString, "<unknown: %d>", (int)status);
             return tempString;
     }
 }
 
 /* Note: tests clGetEventStatus and clReleaseEvent (implicitly) */
-int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_execute_status(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     cl_int status;
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
     /* Now wait for it to be done */
-    error = clWaitForEvents( 1, &event );
-    test_error( error, "Unable to wait for event" );
-
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
-    if( status != CL_COMPLETE )
+    error = clWaitForEvents(1, &event);
+    test_error(error, "Unable to wait for event");
+
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error,
+               "Calling clGetEventStatus to wait for event completion failed");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after event complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
@@ -113,57 +128,75 @@ int test_event_get_execute_status( cl_device_id deviceID, cl_context context, cl
     return 0;
 }
 
-int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_info(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements)
 {
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
     /* Verify parameters of clGetEventInfo not already tested by other tests */
     cl_command_queue otherQueue;
     size_t size;
 
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_QUEUE, sizeof( otherQueue ), &otherQueue, &size );
-    test_error( error, "Unable to get event info!" );
-    // We can not check if this is the right queue because this is an opaque object.
-    if( size != sizeof( queue ) )
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_QUEUE, sizeof(otherQueue),
+                           &otherQueue, &size);
+    test_error(error, "Unable to get event info!");
+    // We can not check if this is the right queue because this is an opaque
+    // object.
+    if (size != sizeof(queue))
     {
-        log_error( "ERROR: Returned command queue size does not validate (expected %d, got %d)\n", (int)sizeof( queue ), (int)size );
+        log_error("ERROR: Returned command queue size does not validate "
+                  "(expected %d, got %d)\n",
+                  (int)sizeof(queue), (int)size);
         return -1;
     }
 
     cl_command_type type;
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof( type ), &type, &size );
-    test_error( error, "Unable to get event info!" );
-    if( type != CL_COMMAND_NDRANGE_KERNEL )
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_TYPE, sizeof(type), &type,
+                           &size);
+    test_error(error, "Unable to get event info!");
+    if (type != CL_COMMAND_NDRANGE_KERNEL)
     {
-        log_error( "ERROR: Returned command type does not validate (expected %d, got %d)\n", (int)CL_COMMAND_NDRANGE_KERNEL, (int)type );
+        log_error("ERROR: Returned command type does not validate (expected "
+                  "%d, got %d)\n",
+                  (int)CL_COMMAND_NDRANGE_KERNEL, (int)type);
         return -1;
     }
-    if( size != sizeof( type ) )
+    if (size != sizeof(type))
     {
-        log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+        log_error("ERROR: Returned command type size does not validate "
+                  "(expected %d, got %d)\n",
+                  (int)sizeof(type), (int)size);
         return -1;
     }
 
     cl_uint count;
-    error = clGetEventInfo( event, CL_EVENT_REFERENCE_COUNT, sizeof( count ), &count, &size );
-    test_error( error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!" );
-    if( size != sizeof( count ) )
+    error = clGetEventInfo(event, CL_EVENT_REFERENCE_COUNT, sizeof(count),
+                           &count, &size);
+    test_error(error, "Unable to get event info for CL_EVENT_REFERENCE_COUNT!");
+    if (size != sizeof(count))
     {
-        log_error( "ERROR: Returned command type size does not validate (expected %d, got %d)\n", (int)sizeof( type ), (int)size );
+        log_error("ERROR: Returned command type size does not validate "
+                  "(expected %d, got %d)\n",
+                  (int)sizeof(type), (int)size);
         return -1;
     }
 
     cl_context testCtx;
-    error = clGetEventInfo( event, CL_EVENT_CONTEXT, sizeof( testCtx ), &testCtx, &size );
-    test_error( error, "Unable to get event context info!" );
-    if( size != sizeof( context ) )
+    error = clGetEventInfo(event, CL_EVENT_CONTEXT, sizeof(testCtx), &testCtx,
+                           &size);
+    test_error(error, "Unable to get event context info!");
+    if (size != sizeof(context))
     {
-        log_error( "ERROR: Returned context size does not validate (expected %d, got %d)\n", (int)sizeof( context ), (int)size );
+        log_error("ERROR: Returned context size does not validate (expected "
+                  "%d, got %d)\n",
+                  (int)sizeof(context), (int)size);
         return -1;
     }
-    if( testCtx != context )
+    if (testCtx != context)
     {
-        log_error( "ERROR: Returned context does not match (expected %p, got %p)\n", (void *)context, (void *)testCtx );
+        log_error(
+            "ERROR: Returned context does not match (expected %p, got %p)\n",
+            (void *)context, (void *)testCtx);
         return -1;
     }
 
@@ -171,10 +204,11 @@ int test_event_get_info( cl_device_id deviceID, cl_context context, cl_command_q
     return 0;
 }
 
-int test_event_get_write_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_write_array_status(cl_device_id deviceID, cl_context context,
+                                      cl_command_queue queue, int num_elements)
 {
     cl_mem stream;
-    cl_float testArray[ 1024 * 32 ];
+    cl_float testArray[1024 * 32];
     cl_event event;
     int error;
     cl_int status;
@@ -182,34 +216,41 @@ int test_event_get_write_array_status( cl_device_id deviceID, cl_context context
 
     stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
                             sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
-    error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
-    test_error( error, "Unable to set testing kernel data" );
+    error = clEnqueueWriteBuffer(queue, stream, CL_FALSE, 0,
+                                 sizeof(cl_float) * 1024 * 32,
+                                 (void *)testArray, 0, NULL, &event);
+    test_error(error, "Unable to set testing kernel data");
 
     /* Now wait for it to be done */
-    error = clWaitForEvents( 1, &event );
-    test_error( error, "Unable to wait for event" );
-
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
-    if( status != CL_COMPLETE )
+    error = clWaitForEvents(1, &event);
+    test_error(error, "Unable to wait for event");
+
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error,
+               "Calling clGetEventStatus to wait for event completion failed");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array write complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
 
-    clReleaseMemObject( stream );
-    clReleaseEvent( event );
+    clReleaseMemObject(stream);
+    clReleaseEvent(event);
 
     return 0;
 }
 
-int test_event_get_read_array_status( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_get_read_array_status(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements)
 {
     cl_mem stream;
-    cl_float testArray[ 1024 * 32 ];
+    cl_float testArray[1024 * 32];
     cl_event event;
     int error;
     cl_int status;
@@ -217,58 +258,72 @@ int test_event_get_read_array_status( cl_device_id deviceID, cl_context context,
 
     stream = clCreateBuffer(context, CL_MEM_READ_WRITE,
                             sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
-    error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)testArray, 0, NULL, &event);
-    test_error( error, "Unable to get testing kernel data" );
+    error = clEnqueueReadBuffer(queue, stream, CL_FALSE, 0,
+                                sizeof(cl_float) * 1024 * 32, (void *)testArray,
+                                0, NULL, &event);
+    test_error(error, "Unable to get testing kernel data");
 
 
     /* It should still be running... */
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
 
-    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+        && status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "during array read (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
     /* Now wait for it to be done */
-    error = clWaitForEvents( 1, &event );
-    test_error( error, "Unable to wait for event" );
-
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus to wait for event completion failed" );
-    if( status != CL_COMPLETE )
+    error = clWaitForEvents(1, &event);
+    test_error(error, "Unable to wait for event");
+
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error,
+               "Calling clGetEventStatus to wait for event completion failed");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array read complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
 
-    clReleaseMemObject( stream );
-    clReleaseEvent( event );
+    clReleaseMemObject(stream);
+    clReleaseEvent(event);
 
     return 0;
 }
 
 /* clGetEventStatus not implemented yet */
 
-int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_wait_for_execute(cl_device_id deviceID, cl_context context,
+                                cl_command_queue queue, int num_elements)
 {
     cl_int status;
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
     /* Now we wait for it to be done, then test the status again */
-    error = clWaitForEvents( 1, &event );
-    test_error( error, "Unable to wait for execute event" );
+    error = clWaitForEvents(1, &event);
+    test_error(error, "Unable to wait for execute event");
 
     /* Make sure it worked */
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after event complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
@@ -276,11 +331,12 @@ int test_event_wait_for_execute( cl_device_id deviceID, cl_context context, cl_c
     return 0;
 }
 
-int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_wait_for_array(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     cl_mem streams[2];
-    cl_float readArray[ 1024 * 32 ];
-    cl_float writeArray[ 1024 * 32 ];
+    cl_float readArray[1024 * 32];
+    cl_float writeArray[1024 * 32];
     cl_event events[2];
     int error;
     cl_int status;
@@ -288,128 +344,155 @@ int test_event_wait_for_array( cl_device_id deviceID, cl_context context, cl_com
 
     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                 sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                 sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
-    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
-    test_error( error, "Unable to read testing kernel data" );
+    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0,
+                                sizeof(cl_float) * 1024 * 32, (void *)readArray,
+                                0, NULL, &events[0]);
+    test_error(error, "Unable to read testing kernel data");
 
-    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
-    test_error( error, "Unable to write testing kernel data" );
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0,
+                                 sizeof(cl_float) * 1024 * 32,
+                                 (void *)writeArray, 0, NULL, &events[1]);
+    test_error(error, "Unable to write testing kernel data");
 
     /* Both should still be running */
-    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+        && status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "during array read (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+        && status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "during array write (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
     /* Now try waiting for both */
-    error = clWaitForEvents( 2, events );
-    test_error( error, "Unable to wait for array events" );
+    error = clWaitForEvents(2, events);
+    test_error(error, "Unable to wait for array events");
 
     /* Double check status on both */
-    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array read complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array write complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    clReleaseMemObject( streams[0] );
-    clReleaseMemObject( streams[1] );
-    clReleaseEvent( events[0] );
-    clReleaseEvent( events[1] );
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseEvent(events[0]);
+    clReleaseEvent(events[1]);
 
     return 0;
 }
 
-int test_event_flush( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_flush(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
     int loopCount = 0;
     cl_int status;
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
-    /* Now flush. Note that we can't guarantee this actually lets the op finish, but we can guarantee it's no longer queued */
-    error = clFlush( queue );
-    test_error( error, "Unable to flush events" );
+    /* Now flush. Note that we can't guarantee this actually lets the op finish,
+     * but we can guarantee it's no longer queued */
+    error = clFlush(queue);
+    test_error(error, "Unable to flush events");
 
     /* Make sure it worked */
-         while (1) {
-        error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS,
-                                                                sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
+    while (1)
+    {
+        error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status), &status, NULL);
+        test_error(error, "Calling clGetEventStatus didn't work!");
 
-        if( status != CL_QUEUED )
-                  break;
+        if (status != CL_QUEUED) break;
 
-#if ! defined( _WIN32 )
+#if !defined(_WIN32)
         sleep(1); // give it some time here.
 #else // _WIN32
-            Sleep(1000);
+        Sleep(1000);
 #endif
         ++loopCount;
-          }
-
-/*
-CL_QUEUED (command has been enqueued in the command-queue),
-CL_SUBMITTED (enqueued command has been submitted by the host to the device associated with the command-queue),
-CL_RUNNING (device is currently executing this command),
-CL_COMPLETE (the command has completed), or
-Error code given by a negative integer value. (command was abnormally terminated – this may be caused by a bad memory access etc.).
-*/
-     if(status != CL_COMPLETE && status != CL_SUBMITTED &&
-        status != CL_RUNNING && status != CL_COMPLETE)
-    {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event flush (%d:%s)\n", status, IGetStatusString( status ) );
+    }
+
+    /*
+    CL_QUEUED (command has been enqueued in the command-queue),
+    CL_SUBMITTED (enqueued command has been submitted by the host to the device
+    associated with the command-queue), CL_RUNNING (device is currently
+    executing this command), CL_COMPLETE (the command has completed), or Error
+    code given by a negative integer value. (command was abnormally terminated –
+    this may be caused by a bad memory access etc.).
+    */
+    if (status != CL_COMPLETE && status != CL_SUBMITTED && status != CL_RUNNING
+        && status != CL_COMPLETE)
+    {
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after event flush (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
     /* Now wait */
-    error = clFinish( queue );
-    test_error( error, "Unable to finish events" );
+    error = clFinish(queue);
+    test_error(error, "Unable to finish events");
 
     FINISH_EVENT(queue);
     return 0;
 }
 
 
-int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_finish_execute(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     cl_int status;
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
     /* Now flush and finish all ops */
-    error = clFinish( queue );
-    test_error( error, "Unable to finish all events" );
+    error = clFinish(queue);
+    test_error(error, "Unable to finish all events");
 
     /* Make sure it worked */
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after event complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
@@ -417,11 +500,12 @@ int test_event_finish_execute( cl_device_id deviceID, cl_context context, cl_com
     return 0;
 }
 
-int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_finish_array(cl_device_id deviceID, cl_context context,
+                            cl_command_queue queue, int num_elements)
 {
     cl_mem streams[2];
-    cl_float readArray[ 1024 * 32 ];
-    cl_float writeArray[ 1024 * 32 ];
+    cl_float readArray[1024 * 32];
+    cl_float writeArray[1024 * 32];
     cl_event events[2];
     int error;
     cl_int status;
@@ -429,59 +513,77 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma
 
     streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                 sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                                 sizeof(cl_float) * 1024 * 32, NULL, &error);
-    test_error( error, "Creating test array failed" );
+    test_error(error, "Creating test array failed");
 
-    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)readArray, 0, NULL, &events[0]);
-    test_error( error, "Unable to read testing kernel data" );
+    error = clEnqueueReadBuffer(queue, streams[0], CL_FALSE, 0,
+                                sizeof(cl_float) * 1024 * 32, (void *)readArray,
+                                0, NULL, &events[0]);
+    test_error(error, "Unable to read testing kernel data");
 
-    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0, sizeof(cl_float)*1024*32, (void *)writeArray, 0, NULL, &events[1]);
-    test_error( error, "Unable to write testing kernel data" );
+    error = clEnqueueWriteBuffer(queue, streams[1], CL_FALSE, 0,
+                                 sizeof(cl_float) * 1024 * 32,
+                                 (void *)writeArray, 0, NULL, &events[1]);
+    test_error(error, "Unable to write testing kernel data");
 
     /* Both should still be running */
-    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+        && status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array read (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "during array read (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED && status != CL_COMPLETE)
+    error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_RUNNING && status != CL_QUEUED && status != CL_SUBMITTED
+        && status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus during array write (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "during array write (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
     /* Now try finishing all ops */
-    error = clFinish( queue );
-    test_error( error, "Unable to finish all events" );
+    error = clFinish(queue);
+    test_error(error, "Unable to finish all events");
 
     /* Double check status on both */
-    error = clGetEventInfo( events[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array read complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array read complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    error = clGetEventInfo( events[1], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventStatus didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventStatus didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetErrorStatus after array write complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetErrorStatus "
+                  "after array write complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
-    clReleaseMemObject( streams[0] );
-    clReleaseMemObject( streams[1] );
-    clReleaseEvent( events[0] );
-    clReleaseEvent( events[1] );
+    clReleaseMemObject(streams[0]);
+    clReleaseMemObject(streams[1]);
+    clReleaseEvent(events[0]);
+    clReleaseEvent(events[1]);
 
     return 0;
 }
@@ -489,7 +591,8 @@ int test_event_finish_array( cl_device_id deviceID, cl_context context, cl_comma
 
 #define NUM_EVENT_RUNS 100
 
-int test_event_release_before_done( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_release_before_done(cl_device_id deviceID, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
     // Create a kernel to run
     clProgramWrapper program;
@@ -501,21 +604,24 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c
     int error, i;
 
     // Create a kernel
-    if( create_single_kernel_helper( context, &program, &kernel[0], 1, sample_long_test_kernel, "sample_test" ) )
+    if (create_single_kernel_helper(context, &program, &kernel[0], 1,
+                                    sample_long_test_kernel, "sample_test"))
     {
         return -1;
     }
 
-    for( i = 1; i < NUM_EVENT_RUNS; i++ ) {
-       kernel[i] = clCreateKernel(program, "sample_test", &error);
-    test_error(error, "Unable to create kernel");
-  }
+    for (i = 1; i < NUM_EVENT_RUNS; i++)
+    {
+        kernel[i] = clCreateKernel(program, "sample_test", &error);
+        test_error(error, "Unable to create kernel");
+    }
 
-    error = get_max_common_work_group_size( context, kernel[0], 1024, &threads[0] );
-    test_error( error, "Unable to get work group size to use" );
+    error =
+        get_max_common_work_group_size(context, kernel[0], 1024, &threads[0]);
+    test_error(error, "Unable to get work group size to use");
 
     // Create a set of streams to use as arguments
-    for( i = 0; i < NUM_EVENT_RUNS; i++ )
+    for (i = 0; i < NUM_EVENT_RUNS; i++)
     {
         streams[i][0] =
             clCreateBuffer(context, CL_MEM_READ_WRITE,
@@ -523,77 +629,89 @@ int test_event_release_before_done( cl_device_id deviceID, cl_context context, c
         streams[i][1] =
             clCreateBuffer(context, CL_MEM_READ_WRITE,
                            sizeof(cl_int) * threads[0], NULL, &error);
-        if( ( streams[i][0] == NULL ) || ( streams[i][1] == NULL ) )
+        if ((streams[i][0] == NULL) || (streams[i][1] == NULL))
         {
-            log_error( "ERROR: Unable to allocate testing streams" );
+            log_error("ERROR: Unable to allocate testing streams");
             return -1;
         }
     }
 
-    // Execute the kernels one by one, hopefully making sure they won't be done by the time we get to the end
-    for( i = 0; i < NUM_EVENT_RUNS; i++ )
+    // Execute the kernels one by one, hopefully making sure they won't be done
+    // by the time we get to the end
+    for (i = 0; i < NUM_EVENT_RUNS; i++)
     {
-        error = clSetKernelArg( kernel[i], 0, sizeof( cl_mem ), &streams[i][0] );
-        error |= clSetKernelArg( kernel[i], 1, sizeof( cl_mem ), &streams[i][1] );
-        test_error( error, "Unable to set kernel arguments" );
+        error = clSetKernelArg(kernel[i], 0, sizeof(cl_mem), &streams[i][0]);
+        error |= clSetKernelArg(kernel[i], 1, sizeof(cl_mem), &streams[i][1]);
+        test_error(error, "Unable to set kernel arguments");
 
-        error = clEnqueueNDRangeKernel( queue, kernel[i], 1, NULL, threads, threads, 0, NULL, &events[i]);
-        test_error( error, "Unable to execute test kernel" );
+        error = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads,
+                                       threads, 0, NULL, &events[i]);
+        test_error(error, "Unable to execute test kernel");
     }
 
     // Free all but the last event
-    for( i = 0; i < NUM_EVENT_RUNS - 1; i++ )
+    for (i = 0; i < NUM_EVENT_RUNS - 1; i++)
     {
-        clReleaseEvent( events[ i ] );
+        clReleaseEvent(events[i]);
     }
 
     // Get status on the last one, then free it
-    error = clGetEventInfo( events[ NUM_EVENT_RUNS - 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Unable to get event status" );
+    error = clGetEventInfo(events[NUM_EVENT_RUNS - 1],
+                           CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(status),
+                           &status, NULL);
+    test_error(error, "Unable to get event status");
 
-    clReleaseEvent( events[ NUM_EVENT_RUNS - 1 ] );
+    clReleaseEvent(events[NUM_EVENT_RUNS - 1]);
 
     // Was the status still-running?
-    if( status == CL_COMPLETE )
+    if (status == CL_COMPLETE)
     {
-        log_info( "WARNING: Events completed before they could be released, so test is a null-op. Increase workload and try again." );
+        log_info("WARNING: Events completed before they could be released, so "
+                 "test is a null-op. Increase workload and try again.");
     }
-    else if( status == CL_RUNNING || status == CL_QUEUED || status == CL_SUBMITTED )
+    else if (status == CL_RUNNING || status == CL_QUEUED
+             || status == CL_SUBMITTED)
     {
-        log_info( "Note: Event status was running or queued when released, so test was good.\n" );
+        log_info("Note: Event status was running or queued when released, so "
+                 "test was good.\n");
     }
 
     // If we didn't crash by now, the test succeeded
-    clFinish( queue );
+    clFinish(queue);
 
     return 0;
 }
 
-int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_marker(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     cl_int status;
-    SETUP_EVENT( context, queue );
+    SETUP_EVENT(context, queue);
 
-    /* Now we queue a marker and wait for that, which--since it queues afterwards--should guarantee the execute finishes too */
+    /* Now we queue a marker and wait for that, which--since it queues
+     * afterwards--should guarantee the execute finishes too */
     clEventWrapper markerEvent;
-    //error = clEnqueueMarker( queue, &markerEvent );
+    // error = clEnqueueMarker( queue, &markerEvent );
 
 #ifdef CL_VERSION_1_2
-    error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent );
+    error = clEnqueueMarkerWithWaitList(queue, 0, NULL, &markerEvent);
 #else
-    error = clEnqueueMarker( queue, &markerEvent );
+    error = clEnqueueMarker(queue, &markerEvent);
 #endif
-       test_error( error, "Unable to queue marker" );
+    test_error(error, "Unable to queue marker");
     /* Now we wait for it to be done, then test the status again */
-    error = clWaitForEvents( 1, &markerEvent );
-    test_error( error, "Unable to wait for marker event" );
+    error = clWaitForEvents(1, &markerEvent);
+    test_error(error, "Unable to wait for marker event");
 
     /* Check the status of the first event */
-    error = clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status ), &status, NULL );
-    test_error( error, "Calling clGetEventInfo didn't work!" );
-    if( status != CL_COMPLETE )
+    error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status), &status, NULL);
+    test_error(error, "Calling clGetEventInfo didn't work!");
+    if (status != CL_COMPLETE)
     {
-        log_error( "ERROR: Incorrect status returned from clGetEventInfo after event complete (%d:%s)\n", status, IGetStatusString( status ) );
+        log_error("ERROR: Incorrect status returned from clGetEventInfo after "
+                  "event complete (%d:%s)\n",
+                  status, IGetStatusString(status));
         return -1;
     }
 
@@ -602,81 +720,101 @@ int test_event_enqueue_marker( cl_device_id deviceID, cl_context context, cl_com
 }
 
 #ifdef CL_VERSION_1_2
-int test_event_enqueue_marker_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_marker_with_event_list(cl_device_id deviceID,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
 {
-    SETUP_EVENT( context, queue );
-    cl_event event_list[3]={ NULL, NULL, NULL};
+    SETUP_EVENT(context, queue);
+    cl_event event_list[3] = { NULL, NULL, NULL };
 
-    size_t threads[1] = { 10 }, localThreads[1]={1};
-    cl_uint event_count=2;
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
-      test_error( error, " clEnqueueMarkerWithWaitList   1 " );
+    size_t threads[1] = { 10 }, localThreads[1] = { 1 };
+    cl_uint event_count = 2;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[0]);
+    test_error(error, " clEnqueueMarkerWithWaitList   1 ");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
-      test_error( error, " clEnqueueMarkerWithWaitList 2" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[1]);
+    test_error(error, " clEnqueueMarkerWithWaitList 2");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
-      test_error( error, " clEnqueueMarkerWithWaitList  3" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, " clEnqueueMarkerWithWaitList  3");
 
     // test the case event returned
-    error =clEnqueueMarkerWithWaitList(queue, event_count, event_list,  &event_list[2]);
-      test_error( error, " clEnqueueMarkerWithWaitList " );
+    error = clEnqueueMarkerWithWaitList(queue, event_count, event_list,
+                                        &event_list[2]);
+    test_error(error, " clEnqueueMarkerWithWaitList ");
 
     error = clReleaseEvent(event_list[0]);
     error |= clReleaseEvent(event_list[1]);
-    test_error( error, "clReleaseEvent" );
+    test_error(error, "clReleaseEvent");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
-    test_error( error, " clEnqueueMarkerWithWaitList   1 -1 " );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[0]);
+    test_error(error, " clEnqueueMarkerWithWaitList   1 -1 ");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
-    test_error( error, " clEnqueueMarkerWithWaitList  2-2" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[1]);
+    test_error(error, " clEnqueueMarkerWithWaitList  2-2");
 
-    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
-    error =clEnqueueMarkerWithWaitList(queue, event_count, event_list,  NULL);
-    test_error( error, " clEnqueueMarkerWithWaitList " );
+    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error :
+    // clEnqueueMarkerWithWaitList failed: event is a NULL value
+    error = clEnqueueMarkerWithWaitList(queue, event_count, event_list, NULL);
+    test_error(error, " clEnqueueMarkerWithWaitList ");
 
     error = clReleaseEvent(event_list[0]);
     error |= clReleaseEvent(event_list[1]);
     error |= clReleaseEvent(event_list[2]);
-    test_error( error, "clReleaseEvent" );
+    test_error(error, "clReleaseEvent");
 
     FINISH_EVENT(queue);
     return 0;
 }
 
-int test_event_enqueue_barrier_with_event_list( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_event_enqueue_barrier_with_event_list(cl_device_id deviceID,
+                                               cl_context context,
+                                               cl_command_queue queue,
+                                               int num_elements)
 {
-    SETUP_EVENT( context, queue );
-    cl_event event_list[3]={ NULL, NULL, NULL};
+    SETUP_EVENT(context, queue);
+    cl_event event_list[3] = { NULL, NULL, NULL };
 
-    size_t threads[1] = { 10 }, localThreads[1]={1};
-    cl_uint event_count=2;
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
-    test_error( error, " clEnqueueBarrierWithWaitList   1 " );
+    size_t threads[1] = { 10 }, localThreads[1] = { 1 };
+    cl_uint event_count = 2;
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[0]);
+    test_error(error, " clEnqueueBarrierWithWaitList   1 ");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
-    test_error( error, " clEnqueueBarrierWithWaitList 2" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[1]);
+    test_error(error, " clEnqueueBarrierWithWaitList 2");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, NULL);
-    test_error( error, " clEnqueueBarrierWithWaitList  20" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, NULL);
+    test_error(error, " clEnqueueBarrierWithWaitList  20");
 
     // test the case event returned
-    error =clEnqueueBarrierWithWaitList(queue, event_count, event_list,  &event_list[2]);
-    test_error( error, " clEnqueueBarrierWithWaitList " );
+    error = clEnqueueBarrierWithWaitList(queue, event_count, event_list,
+                                         &event_list[2]);
+    test_error(error, " clEnqueueBarrierWithWaitList ");
 
     clReleaseEvent(event_list[0]);
     clReleaseEvent(event_list[1]);
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[0]);
-    test_error( error, " clEnqueueBarrierWithWaitList   1 " );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[0]);
+    test_error(error, " clEnqueueBarrierWithWaitList   1 ");
 
-    error= clEnqueueNDRangeKernel( queue,kernel,1,NULL, threads, localThreads, 0, NULL, &event_list[1]);
-    test_error( error, " clEnqueueBarrierWithWaitList 2" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
+                                   localThreads, 0, NULL, &event_list[1]);
+    test_error(error, " clEnqueueBarrierWithWaitList 2");
 
-    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error : clEnqueueMarkerWithWaitList failed: event is a NULL value
-    error = clEnqueueBarrierWithWaitList(queue, event_count, event_list,  NULL);
-    test_error( error, " clEnqueueBarrierWithWaitList " );
+    // test the case event =NULL,   caused [CL_INVALID_VALUE] : OpenCL Error :
+    // clEnqueueMarkerWithWaitList failed: event is a NULL value
+    error = clEnqueueBarrierWithWaitList(queue, event_count, event_list, NULL);
+    test_error(error, " clEnqueueBarrierWithWaitList ");
 
     clReleaseEvent(event_list[0]);
     clReleaseEvent(event_list[1]);
diff --git a/test_conformance/events/test_userevents.cpp b/test_conformance/events/test_userevents.cpp
index 0a4954f9..1fdb4ea4 100644
--- a/test_conformance/events/test_userevents.cpp
+++ b/test_conformance/events/test_userevents.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -14,11 +14,11 @@
 // limitations under the License.
 //
 #if defined(__APPLE__)
-    #include <OpenCL/opencl.h>
-    #include <mach/mach_time.h>
+#include <OpenCL/opencl.h>
+#include <mach/mach_time.h>
 #else
-    #include <CL/cl.h>
-  #include <malloc.h>
+#include <CL/cl.h>
+#include <malloc.h>
 #endif
 #include <assert.h>
 #include <stdio.h>
@@ -29,189 +29,261 @@
 // CL error checking.
 
 #if defined(_MSC_VER)
-#define CL_EXIT_ERROR(cmd,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
-log_error(## __VA_ARGS__ );\
-log_error("\n");\
-return -1;\
-}\
-}
+#define CL_EXIT_ERROR(cmd, ...)                                                \
+    {                                                                          \
+        if ((cmd) != CL_SUCCESS)                                               \
+        {                                                                      \
+            log_error("CL ERROR: %s %u: ", __FILE__, __LINE__);                \
+            log_error(##__VA_ARGS__);                                          \
+            log_error("\n");                                                   \
+            return -1;                                                         \
+        }                                                                      \
+    }
 #else
-#define CL_EXIT_ERROR(cmd,format,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-log_error("CL ERROR: %s %u: ", __FILE__,__LINE__);\
-log_error(format,## __VA_ARGS__ );\
-log_error("\n");\
-return -1;\
-}\
-}
-#endif
-
-#define CL_EXIT_BUILD_ERROR(cmd,program,format,...) \
-{ \
-if ((cmd) != CL_SUCCESS) { \
-cl_uint num_devices_;\
-clGetProgramInfo(program,CL_PROGRAM_NUM_DEVICES,sizeof(num_devices_),&num_devices_,NULL);\
-cl_device_id *device_list;\
-device_list=(cl_device_id *)malloc(num_devices_*sizeof(cl_device_id));\
-clGetProgramInfo(program,CL_PROGRAM_DEVICES,num_devices_*sizeof(cl_device_id),device_list,NULL);\
-for (unsigned i=0;i<num_devices_;++i) {\
-size_t len;\
-char buffer[2048];\
-clGetProgramBuildInfo(program,device_list[i],CL_PROGRAM_BUILD_LOG,sizeof(buffer),buffer,&len);\
-log_error("DEVICE %u CL BUILD ERROR: %s(%u): ",i,__FILE__,__LINE__);\
-log_error(format,## __VA_ARGS__ );\
-log_error("\n");\
-}\
-free(device_list);\
-return -1;\
-}\
-}
-
-const char* src[] = {
-  "__kernel void simple_task(__global float* output) {\n"
-  "  output[0] += 1;\n"
-  "}\n"
-};
-
-enum { MaxDevices = 8 };
-
-int test_userevents( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
-{
-
-  cl_int err;
-
-  cl_event u1 = clCreateUserEvent( context, &err );
-  CL_EXIT_ERROR(err,"clCreateUserEvent failed");
-
-  // Test event properties.
-  cl_int s;
-  size_t sizeofs;
-  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof s, &s, &sizeofs),"clGetEventInfo failed");
-  CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_EXECUTION_STATUS");
-  CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_EXECUTION_STATUS");
-
-  cl_command_type t;
-  size_t sizeoft;
-  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft),"clGetEventInfo failed");
-  CL_EXIT_ERROR((sizeof t == sizeoft) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE");
-  CL_EXIT_ERROR((t == CL_COMMAND_USER) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE");
-
-  cl_command_queue q;
-  size_t sizeofq;
-  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq),"clGetEventInfo failed");
-  CL_EXIT_ERROR((sizeof q == sizeofq) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE");
-  CL_EXIT_ERROR((q == NULL) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE");
-
-  cl_context c;
-  size_t sizeofc;
-  CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc),"clGetEventInfo failed");
-  CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong size for CL_EVENT_CONTEXT");
-  CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1,"clGetEventInfo returned wrong value for CL_EVENT_CONTEXT");
-
-  cl_ulong p;
-  err = clGetEventProfilingInfo(u1,CL_PROFILING_COMMAND_QUEUED,sizeof p,&p,0);
-  CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1,"clGetEventProfilingInfo returned wrong error.");
-
-  // Test semantics.
-  cl_program program;
-  err = create_single_kernel_helper_create_program(context, &program, 1, src);
-  CL_EXIT_ERROR(err,"clCreateProgramWithSource failed");
-
-  CL_EXIT_BUILD_ERROR(clBuildProgram(program,0,NULL,"",NULL,NULL),program,"Building program from inline src:\t%s",src[0]);
-
-  cl_kernel k0 = clCreateKernel(program,"simple_task",&err);
-  CL_EXIT_ERROR(err,"clCreateKernel failed");
-
-  float buffer[1];
-  cl_mem output = clCreateBuffer(context,CL_MEM_USE_HOST_PTR,sizeof buffer, buffer, &err);
-  CL_EXIT_ERROR(err,"clCreateBuffer failed.");
-
-  CL_EXIT_ERROR(clSetKernelArg(k0,0,sizeof(output),&output),"clSetKernelArg failed");
-
-
-  // Successful case. //////////////////////////////////////////////////////////////////////////////////////
-  {
-    cl_event e[4];
-    cl_uint  N = sizeof e / sizeof(cl_event);
-
-    log_info("Enqueuing tasks\n");
-    for (cl_uint i = 0; i != N; ++i)
-      CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u1,&e[i]),"clEnqueueTaskFailed");
-
-    log_info("Checking task status before setting user event status\n");
-    for (cl_uint i = 0; i != N; ++i) {
-      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
-      CL_EXIT_ERROR((s >= CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status before user event",i);
+#define CL_EXIT_ERROR(cmd, format, ...)                                        \
+    {                                                                          \
+        if ((cmd) != CL_SUCCESS)                                               \
+        {                                                                      \
+            log_error("CL ERROR: %s %u: ", __FILE__, __LINE__);                \
+            log_error(format, ##__VA_ARGS__);                                  \
+            log_error("\n");                                                   \
+            return -1;                                                         \
+        }                                                                      \
     }
+#endif
 
-    log_info("Setting user event status to complete\n");
-    CL_EXIT_ERROR(clSetUserEventStatus(u1,CL_COMPLETE),"clSetUserEventStatus failed");
-
-    log_info("Waiting for tasks to finish executing\n");
-    CL_EXIT_ERROR(clWaitForEvents( 1, &e[N-1] ),"clWaitForEvent failed");
-
-    log_info("Checking task status after setting user event status\n");
-    for (cl_uint i = 0; i != N; ++i) {
-      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
-      CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after successful user event",i,s);
+#define CL_EXIT_BUILD_ERROR(cmd, program, format, ...)                         \
+    {                                                                          \
+        if ((cmd) != CL_SUCCESS)                                               \
+        {                                                                      \
+            cl_uint num_devices_;                                              \
+            clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,                  \
+                             sizeof(num_devices_), &num_devices_, NULL);       \
+            cl_device_id *device_list;                                         \
+            device_list =                                                      \
+                (cl_device_id *)malloc(num_devices_ * sizeof(cl_device_id));   \
+            clGetProgramInfo(program, CL_PROGRAM_DEVICES,                      \
+                             num_devices_ * sizeof(cl_device_id), device_list, \
+                             NULL);                                            \
+            for (unsigned i = 0; i < num_devices_; ++i)                        \
+            {                                                                  \
+                size_t len;                                                    \
+                char buffer[2048];                                             \
+                clGetProgramBuildInfo(program, device_list[i],                 \
+                                      CL_PROGRAM_BUILD_LOG, sizeof(buffer),    \
+                                      buffer, &len);                           \
+                log_error("DEVICE %u CL BUILD ERROR: %s(%u): ", i, __FILE__,   \
+                          __LINE__);                                           \
+                log_error(format, ##__VA_ARGS__);                              \
+                log_error("\n");                                               \
+            }                                                                  \
+            free(device_list);                                                 \
+            return -1;                                                         \
+        }                                                                      \
     }
 
-    CL_EXIT_ERROR(clReleaseEvent(u1),"clReleaseEvent failed");
-
-    for (cl_uint i = 0; i != N; ++i)
-      CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
-
-    log_info("Successful user event case passed.\n");
-
-  }
+const char *src[] = { "__kernel void simple_task(__global float* output) {\n"
+                      "  output[0] += 1;\n"
+                      "}\n" };
 
-  // Test unsuccessful user event case. ///////////////////////////////////////////////////////////////////
-  {
-    cl_event u2 = clCreateUserEvent( context, &err );
-    CL_EXIT_ERROR(err,"clCreateUserEvent failed");
-
-    cl_event e[4];
-    cl_uint  N = sizeof e / sizeof(cl_event);
+enum
+{
+    MaxDevices = 8
+};
 
-    log_info("Enqueuing tasks\n");
-    for (cl_uint i = 0; i != N; ++i)
-      CL_EXIT_ERROR(clEnqueueTask(queue,k0,1,&u2,&e[i]),"clEnqueueTaskFailed");
+int test_userevents(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
+{
 
-    log_info("Checking task status before setting user event status\n");
-    for (cl_uint i = 0; i != N; ++i) {
-      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
-      CL_EXIT_ERROR((s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %d before user event",i, (int) s);
+    cl_int err;
+
+    cl_event u1 = clCreateUserEvent(context, &err);
+    CL_EXIT_ERROR(err, "clCreateUserEvent failed");
+
+    // Test event properties.
+    cl_int s;
+    size_t sizeofs;
+    CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                 sizeof s, &s, &sizeofs),
+                  "clGetEventInfo failed");
+    CL_EXIT_ERROR((sizeof s == sizeofs) ? CL_SUCCESS : -1,
+                  "clGetEventInfo returned wrong size for "
+                  "CL_EVENT_COMMAND_EXECUTION_STATUS");
+    CL_EXIT_ERROR((s == CL_SUBMITTED) ? CL_SUCCESS : -1,
+                  "clGetEventInfo returned wrong value for "
+                  "CL_EVENT_COMMAND_EXECUTION_STATUS");
+
+    cl_command_type t;
+    size_t sizeoft;
+    CL_EXIT_ERROR(
+        clGetEventInfo(u1, CL_EVENT_COMMAND_TYPE, sizeof t, &t, &sizeoft),
+        "clGetEventInfo failed");
+    CL_EXIT_ERROR(
+        (sizeof t == sizeoft) ? CL_SUCCESS : -1,
+        "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_TYPE");
+    CL_EXIT_ERROR(
+        (t == CL_COMMAND_USER) ? CL_SUCCESS : -1,
+        "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_TYPE");
+
+    cl_command_queue q;
+    size_t sizeofq;
+    CL_EXIT_ERROR(
+        clGetEventInfo(u1, CL_EVENT_COMMAND_QUEUE, sizeof q, &q, &sizeofq),
+        "clGetEventInfo failed");
+    CL_EXIT_ERROR(
+        (sizeof q == sizeofq) ? CL_SUCCESS : -1,
+        "clGetEventInfo returned wrong size for CL_EVENT_COMMAND_QUEUE");
+    CL_EXIT_ERROR(
+        (q == NULL) ? CL_SUCCESS : -1,
+        "clGetEventInfo returned wrong value for CL_EVENT_COMMAND_QUEUE");
+
+    cl_context c;
+    size_t sizeofc;
+    CL_EXIT_ERROR(clGetEventInfo(u1, CL_EVENT_CONTEXT, sizeof c, &c, &sizeofc),
+                  "clGetEventInfo failed");
+    CL_EXIT_ERROR((sizeof c == sizeofc) ? CL_SUCCESS : -1,
+                  "clGetEventInfo returned wrong size for CL_EVENT_CONTEXT");
+    CL_EXIT_ERROR((c == context) ? CL_SUCCESS : -1,
+                  "clGetEventInfo returned wrong value for CL_EVENT_CONTEXT");
+
+    cl_ulong p;
+    err = clGetEventProfilingInfo(u1, CL_PROFILING_COMMAND_QUEUED, sizeof p, &p,
+                                  0);
+    CL_EXIT_ERROR((err != CL_SUCCESS) ? CL_SUCCESS : -1,
+                  "clGetEventProfilingInfo returned wrong error.");
+
+    // Test semantics.
+    cl_program program;
+    err = create_single_kernel_helper_create_program(context, &program, 1, src);
+    CL_EXIT_ERROR(err, "clCreateProgramWithSource failed");
+
+    CL_EXIT_BUILD_ERROR(clBuildProgram(program, 0, NULL, "", NULL, NULL),
+                        program, "Building program from inline src:\t%s",
+                        src[0]);
+
+    cl_kernel k0 = clCreateKernel(program, "simple_task", &err);
+    CL_EXIT_ERROR(err, "clCreateKernel failed");
+
+    float buffer[1];
+    cl_mem output = clCreateBuffer(context, CL_MEM_USE_HOST_PTR, sizeof buffer,
+                                   buffer, &err);
+    CL_EXIT_ERROR(err, "clCreateBuffer failed.");
+
+    CL_EXIT_ERROR(clSetKernelArg(k0, 0, sizeof(output), &output),
+                  "clSetKernelArg failed");
+
+
+    // Successful case.
+    // //////////////////////////////////////////////////////////////////////////////////////
+    {
+        cl_event e[4];
+        cl_uint N = sizeof e / sizeof(cl_event);
+
+        log_info("Enqueuing tasks\n");
+        for (cl_uint i = 0; i != N; ++i)
+            CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u1, &e[i]),
+                          "clEnqueueTaskFailed");
+
+        log_info("Checking task status before setting user event status\n");
+        for (cl_uint i = 0; i != N; ++i)
+        {
+            CL_EXIT_ERROR(clGetEventInfo(e[i],
+                                         CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                         sizeof s, &s, 0),
+                          "clGetEventInfo failed");
+            CL_EXIT_ERROR(
+                (s >= CL_SUBMITTED) ? CL_SUCCESS : -1,
+                "clGetEventInfo %u returned wrong status before user event", i);
+        }
+
+        log_info("Setting user event status to complete\n");
+        CL_EXIT_ERROR(clSetUserEventStatus(u1, CL_COMPLETE),
+                      "clSetUserEventStatus failed");
+
+        log_info("Waiting for tasks to finish executing\n");
+        CL_EXIT_ERROR(clWaitForEvents(1, &e[N - 1]), "clWaitForEvent failed");
+
+        log_info("Checking task status after setting user event status\n");
+        for (cl_uint i = 0; i != N; ++i)
+        {
+            CL_EXIT_ERROR(clGetEventInfo(e[i],
+                                         CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                         sizeof s, &s, 0),
+                          "clGetEventInfo failed");
+            CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,
+                          "clGetEventInfo %u returned wrong status %04x after "
+                          "successful user event",
+                          i, s);
+        }
+
+        CL_EXIT_ERROR(clReleaseEvent(u1), "clReleaseEvent failed");
+
+        for (cl_uint i = 0; i != N; ++i)
+            CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed");
+
+        log_info("Successful user event case passed.\n");
     }
 
-    log_info("Setting user event status to unsuccessful result\n");
-    CL_EXIT_ERROR(clSetUserEventStatus(u2,-1),"clSetUserEventStatus failed");
-
-    log_info("Waiting for tasks to finish executing\n");
-    CL_EXIT_ERROR((clWaitForEvents( N, &e[0] )!=CL_SUCCESS) ? CL_SUCCESS : -1,"clWaitForEvent succeeded when it should have failed");
-
-    log_info("Checking task status after setting user event status\n");
-    for (cl_uint i = 0; i != N; ++i) {
-      CL_EXIT_ERROR(clGetEventInfo(e[i],CL_EVENT_COMMAND_EXECUTION_STATUS,sizeof s,&s,0),"clGetEventInfo failed");
-      CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,"clGetEventInfo %u returned wrong status %04x after unsuccessful user event",i,s);
+    // Test unsuccessful user event case.
+    // ///////////////////////////////////////////////////////////////////
+    {
+        cl_event u2 = clCreateUserEvent(context, &err);
+        CL_EXIT_ERROR(err, "clCreateUserEvent failed");
+
+        cl_event e[4];
+        cl_uint N = sizeof e / sizeof(cl_event);
+
+        log_info("Enqueuing tasks\n");
+        for (cl_uint i = 0; i != N; ++i)
+            CL_EXIT_ERROR(clEnqueueTask(queue, k0, 1, &u2, &e[i]),
+                          "clEnqueueTaskFailed");
+
+        log_info("Checking task status before setting user event status\n");
+        for (cl_uint i = 0; i != N; ++i)
+        {
+            CL_EXIT_ERROR(clGetEventInfo(e[i],
+                                         CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                         sizeof s, &s, 0),
+                          "clGetEventInfo failed");
+            CL_EXIT_ERROR(
+                (s == CL_QUEUED || s == CL_SUBMITTED) ? CL_SUCCESS : -1,
+                "clGetEventInfo %u returned wrong status %d before user event",
+                i, (int)s);
+        }
+
+        log_info("Setting user event status to unsuccessful result\n");
+        CL_EXIT_ERROR(clSetUserEventStatus(u2, -1),
+                      "clSetUserEventStatus failed");
+
+        log_info("Waiting for tasks to finish executing\n");
+        CL_EXIT_ERROR((clWaitForEvents(N, &e[0]) != CL_SUCCESS) ? CL_SUCCESS
+                                                                : -1,
+                      "clWaitForEvent succeeded when it should have failed");
+
+        log_info("Checking task status after setting user event status\n");
+        for (cl_uint i = 0; i != N; ++i)
+        {
+            CL_EXIT_ERROR(clGetEventInfo(e[i],
+                                         CL_EVENT_COMMAND_EXECUTION_STATUS,
+                                         sizeof s, &s, 0),
+                          "clGetEventInfo failed");
+            CL_EXIT_ERROR((s != CL_QUEUED) ? CL_SUCCESS : -1,
+                          "clGetEventInfo %u returned wrong status %04x after "
+                          "unsuccessful user event",
+                          i, s);
+        }
+
+        CL_EXIT_ERROR(clReleaseEvent(u2), "clReleaseEvent failed");
+
+        for (cl_uint i = 0; i != N; ++i)
+            CL_EXIT_ERROR(clReleaseEvent(e[i]), "clReleaseEvent failed");
+
+        log_info("Unsuccessful user event case passed.\n");
     }
 
-    CL_EXIT_ERROR(clReleaseEvent(u2),"clReleaseEvent failed");
-
-    for (cl_uint i = 0; i != N; ++i)
-      CL_EXIT_ERROR(clReleaseEvent(e[i]),"clReleaseEvent failed");
-
-    log_info("Unsuccessful user event case passed.\n");
-  }
-
-  clReleaseKernel(k0);
-  clReleaseProgram(program);
-  clReleaseMemObject(output);
-
-  return 0;
+    clReleaseKernel(k0);
+    clReleaseProgram(program);
+    clReleaseMemObject(output);
 
+    return 0;
 }
-
diff --git a/test_conformance/events/test_userevents_multithreaded.cpp b/test_conformance/events/test_userevents_multithreaded.cpp
index 51ef2226..a7845bf1 100644
--- a/test_conformance/events/test_userevents_multithreaded.cpp
+++ b/test_conformance/events/test_userevents_multithreaded.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -19,8 +19,8 @@
 
 #include <thread>
 
-#if !defined (_MSC_VER)
-    #include <unistd.h>
+#if !defined(_MSC_VER)
+#include <unistd.h>
 #endif // !_MSC_VER
 
 void trigger_user_event(cl_event *event)
@@ -30,44 +30,44 @@ void trigger_user_event(cl_event *event)
     clSetUserEventStatus(*event, CL_COMPLETE);
 }
 
-int test_userevents_multithreaded( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements )
+int test_userevents_multithreaded(cl_device_id deviceID, cl_context context,
+                                  cl_command_queue queue, int num_elements)
 {
     cl_int error;
 
 
     // Set up a user event to act as a gate
-    clEventWrapper gateEvent = clCreateUserEvent( context, &error );
-    test_error( error, "Unable to create user gate event" );
+    clEventWrapper gateEvent = clCreateUserEvent(context, &error);
+    test_error(error, "Unable to create user gate event");
 
     // Set up a few actions gated on the user event
     NDRangeKernelAction action1;
     ReadBufferAction action2;
     WriteBufferAction action3;
 
-    clEventWrapper actionEvents[ 3 ];
-    Action * actions[] = { &action1, &action2, &action3, NULL };
+    clEventWrapper actionEvents[3];
+    Action *actions[] = { &action1, &action2, &action3, NULL };
 
-    for( int i = 0; actions[ i ] != NULL; i++ )
+    for (int i = 0; actions[i] != NULL; i++)
     {
-        error = actions[ i ]->Setup( deviceID, context, queue );
-        test_error( error, "Unable to set up test action" );
+        error = actions[i]->Setup(deviceID, context, queue);
+        test_error(error, "Unable to set up test action");
 
-        error = actions[ i ]->Execute( queue, 1, &gateEvent, &actionEvents[ i ] );
-        test_error( error, "Unable to execute test action" );
+        error = actions[i]->Execute(queue, 1, &gateEvent, &actionEvents[i]);
+        test_error(error, "Unable to execute test action");
     }
 
     // Now, instead of releasing the gate, we spawn a separate thread to do so
-    log_info( "\tStarting trigger thread...\n" );
+    log_info("\tStarting trigger thread...\n");
     std::thread thread(trigger_user_event, &gateEvent);
 
-    log_info( "\tWaiting for actions...\n" );
-    error = clWaitForEvents( 3, &actionEvents[ 0 ] );
-    test_error( error, "Unable to wait for action events" );
+    log_info("\tWaiting for actions...\n");
+    error = clWaitForEvents(3, &actionEvents[0]);
+    test_error(error, "Unable to wait for action events");
 
     thread.join();
-    log_info( "\tActions completed.\n" );
+    log_info("\tActions completed.\n");
 
     // If we got here without error, we're good
     return 0;
 }
-
diff --git a/test_conformance/events/test_waitlists.cpp b/test_conformance/events/test_waitlists.cpp
index ebf5da9b..6036451f 100644
--- a/test_conformance/events/test_waitlists.cpp
+++ b/test_conformance/events/test_waitlists.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -17,15 +17,16 @@
 #include "action_classes.h"
 
 
-extern const char *IGetStatusString( cl_int status );
+extern const char *IGetStatusString(cl_int status);
 
 #define PRINT_OPS 0
 
-int test_waitlist( cl_device_id device, cl_context context, cl_command_queue queue, Action *actionToTest, bool multiple )
+int test_waitlist(cl_device_id device, cl_context context,
+                  cl_command_queue queue, Action *actionToTest, bool multiple)
 {
-    NDRangeKernelAction    actions[ 2 ];
-    clEventWrapper events[ 3 ];
-    cl_int status[ 3 ];
+    NDRangeKernelAction actions[2];
+    clEventWrapper events[3];
+    cl_int status[3];
     cl_int error;
 
     if (multiple)
@@ -37,41 +38,43 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
                  "reference event 0 in its waitlist.\n");
 
     // Set up the first base action to wait against
-    error = actions[ 0 ].Setup( device, context, queue );
-    test_error( error, "Unable to setup base event to wait against" );
+    error = actions[0].Setup(device, context, queue);
+    test_error(error, "Unable to setup base event to wait against");
 
-    if( multiple )
+    if (multiple)
     {
         // Set up a second event to wait against
-        error = actions[ 1 ].Setup( device, context, queue );
-        test_error( error, "Unable to setup second base event to wait against" );
+        error = actions[1].Setup(device, context, queue);
+        test_error(error, "Unable to setup second base event to wait against");
     }
 
     // Now set up the actual action to test
-    error = actionToTest->Setup( device, context, queue );
-    test_error( error, "Unable to set up test event" );
+    error = actionToTest->Setup(device, context, queue);
+    test_error(error, "Unable to set up test event");
 
     // Execute all events now
     if (PRINT_OPS) log_info("\tExecuting action 0...\n");
-    error = actions[ 0 ].Execute( queue, 0, NULL, &events[ 0 ] );
-    test_error( error, "Unable to execute first event" );
+    error = actions[0].Execute(queue, 0, NULL, &events[0]);
+    test_error(error, "Unable to execute first event");
 
-    if( multiple )
+    if (multiple)
     {
-    if (PRINT_OPS) log_info("\tExecuting action 1...\n");
-        error = actions[ 1 ].Execute( queue, 1, &events[0], &events[ 1 ] );
-        test_error( error, "Unable to execute second event" );
+        if (PRINT_OPS) log_info("\tExecuting action 1...\n");
+        error = actions[1].Execute(queue, 1, &events[0], &events[1]);
+        test_error(error, "Unable to execute second event");
     }
 
     // Sanity check
     if (multiple)
     {
         if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
-        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+        error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status[1]), &status[1], NULL);
         test_error(error, "Unable to get event status");
     }
     if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
-    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status[0]), &status[0], NULL);
     test_error(error, "Unable to get event status");
 
     log_info("\t\tEvent status after starting reference events: reference "
@@ -79,28 +82,34 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
              IGetStatusString(status[0]),
              (multiple ? IGetStatusString(status[1]) : "N/A"), "N/A");
 
-    if( ( status[ 0 ] == CL_COMPLETE ) || ( multiple && status[ 1 ] == CL_COMPLETE ) )
+    if ((status[0] == CL_COMPLETE) || (multiple && status[1] == CL_COMPLETE))
     {
-        log_info( "WARNING: Reference event(s) already completed before we could execute test event! Possible that the reference event blocked (implicitly passing)\n" );
+        log_info("WARNING: Reference event(s) already completed before we "
+                 "could execute test event! Possible that the reference event "
+                 "blocked (implicitly passing)\n");
         return 0;
     }
 
     if (PRINT_OPS) log_info("\tExecuting action to test...\n");
-    error = actionToTest->Execute( queue, ( multiple ) ? 2 : 1, &events[ 0 ], &events[ 2 ] );
-    test_error( error, "Unable to execute test event" );
+    error = actionToTest->Execute(queue, (multiple) ? 2 : 1, &events[0],
+                                  &events[2]);
+    test_error(error, "Unable to execute test event");
 
     // Hopefully, the first event is still running
     if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
-    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
-    test_error( error, "Unable to get event status" );
+    error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status[2]), &status[2], NULL);
+    test_error(error, "Unable to get event status");
     if (multiple)
     {
         if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
-        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+        error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status[1]), &status[1], NULL);
         test_error(error, "Unable to get event status");
     }
     if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
-    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status[0]), &status[0], NULL);
     test_error(error, "Unable to get event status");
 
     log_info("\t\tEvent status after starting test event: reference event 0: "
@@ -109,12 +118,13 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
              (multiple ? IGetStatusString(status[1]) : "N/A"),
              IGetStatusString(status[2]));
 
-    if( multiple )
+    if (multiple)
     {
-        if( status[ 0 ] == CL_COMPLETE && status[ 1 ] == CL_COMPLETE )
+        if (status[0] == CL_COMPLETE && status[1] == CL_COMPLETE)
         {
-            log_info( "WARNING: Both events completed, so unable to test further (implicitly passing).\n" );
-            clFinish( queue );
+            log_info("WARNING: Both events completed, so unable to test "
+                     "further (implicitly passing).\n");
+            clFinish(queue);
             return 0;
         }
 
@@ -124,50 +134,59 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
                 "ERROR: Test failed because the second wait event is complete "
                 "and the first is not.(status: 0: %s and 1: %s)\n",
                 IGetStatusString(status[0]), IGetStatusString(status[1]));
-            clFinish( queue );
+            clFinish(queue);
             return -1;
         }
     }
     else
     {
-        if( status[ 0 ] == CL_COMPLETE )
+        if (status[0] == CL_COMPLETE)
         {
-            log_info( "WARNING: Reference event completed, so unable to test further (implicitly passing).\n" );
-            clFinish( queue );
+            log_info("WARNING: Reference event completed, so unable to test "
+                     "further (implicitly passing).\n");
+            clFinish(queue);
             return 0;
         }
-        if( status[ 0 ] != CL_RUNNING && status[ 0 ] != CL_QUEUED && status[ 0 ] != CL_SUBMITTED )
+        if (status[0] != CL_RUNNING && status[0] != CL_QUEUED
+            && status[0] != CL_SUBMITTED)
         {
-            log_error( "ERROR: Test failed because first wait event is not currently running, queued, or submitted! (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
-            clFinish( queue );
+            log_error(
+                "ERROR: Test failed because first wait event is not currently "
+                "running, queued, or submitted! (status: 0: %s)\n",
+                IGetStatusString(status[0]));
+            clFinish(queue);
             return -1;
         }
     }
 
-    if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+    if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED)
     {
-        log_error( "ERROR: Test event is not waiting to run! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
-        clFinish( queue );
+        log_error("ERROR: Test event is not waiting to run! (status: 2: %s)\n",
+                  IGetStatusString(status[2]));
+        clFinish(queue);
         return -1;
     }
 
     // Now wait for the first reference event
     if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
-    error = clWaitForEvents( 1, &events[ 0 ] );
-    test_error( error, "Unable to wait for reference event" );
+    error = clWaitForEvents(1, &events[0]);
+    test_error(error, "Unable to wait for reference event");
 
     // Grab statuses again
     if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
-    error = clGetEventInfo( events[ 2 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 2 ] ), &status[ 2 ], NULL );
-    test_error( error, "Unable to get event status" );
+    error = clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status[2]), &status[2], NULL);
+    test_error(error, "Unable to get event status");
     if (multiple)
     {
         if (PRINT_OPS) log_info("\tChecking status of action 1...\n");
-        error = clGetEventInfo( events[ 1 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 1 ] ), &status[ 1 ], NULL );
+        error = clGetEventInfo(events[1], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                               sizeof(status[1]), &status[1], NULL);
         test_error(error, "Unable to get event status");
     }
     if (PRINT_OPS) log_info("\tChecking status of action 0...\n");
-    error = clGetEventInfo( events[ 0 ], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof( status[ 0 ] ), &status[ 0 ], NULL );
+    error = clGetEventInfo(events[0], CL_EVENT_COMMAND_EXECUTION_STATUS,
+                           sizeof(status[0]), &status[0], NULL);
     test_error(error, "Unable to get event status");
 
     log_info("\t\tEvent status after waiting for reference event 0: reference "
@@ -177,15 +196,18 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
              IGetStatusString(status[2]));
 
     // Sanity
-    if( status[ 0 ] != CL_COMPLETE )
+    if (status[0] != CL_COMPLETE)
     {
-        log_error( "ERROR: Waited for first event but it's not complete (status: 0: %s)\n", IGetStatusString( status[ 0 ] ) );
-        clFinish( queue );
+        log_error("ERROR: Waited for first event but it's not complete "
+                  "(status: 0: %s)\n",
+                  IGetStatusString(status[0]));
+        clFinish(queue);
         return -1;
     }
 
-    // If we're multiple, and the second event isn't complete, then our test event should still be queued
-    if( multiple && status[ 1 ] != CL_COMPLETE )
+    // If we're multiple, and the second event isn't complete, then our test
+    // event should still be queued
+    if (multiple && status[1] != CL_COMPLETE)
     {
         if (status[1] == CL_RUNNING && status[2] == CL_RUNNING)
         {
@@ -193,17 +215,19 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
             clFinish(queue);
             return -1;
         }
-        if( status[ 2 ] != CL_QUEUED && status[ 2 ] != CL_SUBMITTED )
+        if (status[2] != CL_QUEUED && status[2] != CL_SUBMITTED)
         {
-            log_error( "ERROR: Test event did not wait for second event before starting! (status of ref: 1: %s, of test: 2: %s)\n", IGetStatusString( status[ 1 ] ), IGetStatusString( status[ 2 ] ) );
-            clFinish( queue );
+            log_error("ERROR: Test event did not wait for second event before "
+                      "starting! (status of ref: 1: %s, of test: 2: %s)\n",
+                      IGetStatusString(status[1]), IGetStatusString(status[2]));
+            clFinish(queue);
             return -1;
         }
 
         // Now wait for second event to complete, too
         if (PRINT_OPS) log_info("\tWaiting for action 1 to finish...\n");
-        error = clWaitForEvents( 1, &events[ 1 ] );
-        test_error( error, "Unable to wait for second reference event" );
+        error = clWaitForEvents(1, &events[1]);
+        test_error(error, "Unable to wait for second reference event");
 
         // Grab statuses again
         if (PRINT_OPS) log_info("\tChecking status of action to test 2...\n");
@@ -230,32 +254,38 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
             IGetStatusString(status[2]));
 
         // Sanity
-        if( status[ 1 ] != CL_COMPLETE )
+        if (status[1] != CL_COMPLETE)
         {
-            log_error( "ERROR: Waited for second reference event but it didn't complete (status: 1: %s)\n", IGetStatusString( status[ 1 ] ) );
-            clFinish( queue );
+            log_error("ERROR: Waited for second reference event but it didn't "
+                      "complete (status: 1: %s)\n",
+                      IGetStatusString(status[1]));
+            clFinish(queue);
             return -1;
         }
     }
 
-    // At this point, the test event SHOULD be running, but if it completed, we consider it a pass
-    if( status[ 2 ] == CL_COMPLETE )
+    // At this point, the test event SHOULD be running, but if it completed, we
+    // consider it a pass
+    if (status[2] == CL_COMPLETE)
     {
-        log_info( "WARNING: Test event already completed. Assumed valid.\n" );
-        clFinish( queue );
+        log_info("WARNING: Test event already completed. Assumed valid.\n");
+        clFinish(queue);
         return 0;
     }
-    if( status[ 2 ] != CL_RUNNING && status[ 2 ] != CL_SUBMITTED && status[ 2 ] != CL_QUEUED)
+    if (status[2] != CL_RUNNING && status[2] != CL_SUBMITTED
+        && status[2] != CL_QUEUED)
     {
-        log_error( "ERROR: Second event did not start running after reference event(s) completed! (status: 2: %s)\n", IGetStatusString( status[ 2 ] ) );
-        clFinish( queue );
+        log_error("ERROR: Second event did not start running after reference "
+                  "event(s) completed! (status: 2: %s)\n",
+                  IGetStatusString(status[2]));
+        clFinish(queue);
         return -1;
     }
 
     // Wait for the test event, then return
     if (PRINT_OPS) log_info("\tWaiting for action 2 to test to finish...\n");
-    error = clWaitForEvents( 1, &events[ 2 ] );
-    test_error( error, "Unable to wait for test event" );
+    error = clWaitForEvents(1, &events[2]);
+    test_error(error, "Unable to wait for test event");
 
     error |= clGetEventInfo(events[2], CL_EVENT_COMMAND_EXECUTION_STATUS,
                             sizeof(status[2]), &status[2], NULL);
@@ -280,74 +310,81 @@ int test_waitlist( cl_device_id device, cl_context context, cl_command_queue que
     return 0;
 }
 
-#define TEST_ACTION( name ) \
-    {    \
-        name##Action action;    \
-        log_info( "-- Testing " #name " (waiting on 1 event)...\n" );    \
-        if( ( error = test_waitlist( deviceID, context, queue, &action, false ) ) != CL_SUCCESS )    \
-            retVal++;            \
-        clFinish( queue ); \
-    }    \
-    if( error == CL_SUCCESS )    /* Only run multiples test if single test passed */ \
-    {    \
-        name##Action action;    \
-        log_info( "-- Testing " #name " (waiting on 2 events)...\n" );    \
-        if( ( error = test_waitlist( deviceID, context, queue, &action, true ) ) != CL_SUCCESS )    \
-            retVal++;            \
-        clFinish( queue ); \
+#define TEST_ACTION(name)                                                      \
+    {                                                                          \
+        name##Action action;                                                   \
+        log_info("-- Testing " #name " (waiting on 1 event)...\n");            \
+        if ((error = test_waitlist(deviceID, context, queue, &action, false))  \
+            != CL_SUCCESS)                                                     \
+            retVal++;                                                          \
+        clFinish(queue);                                                       \
+    }                                                                          \
+    if (error                                                                  \
+        == CL_SUCCESS) /* Only run multiples test if single test passed */     \
+    {                                                                          \
+        name##Action action;                                                   \
+        log_info("-- Testing " #name " (waiting on 2 events)...\n");           \
+        if ((error = test_waitlist(deviceID, context, queue, &action, true))   \
+            != CL_SUCCESS)                                                     \
+            retVal++;                                                          \
+        clFinish(queue);                                                       \
     }
 
-int test_waitlists( cl_device_id deviceID, cl_context context, cl_command_queue oldQueue, int num_elements )
+int test_waitlists(cl_device_id deviceID, cl_context context,
+                   cl_command_queue oldQueue, int num_elements)
 {
     cl_int error;
     int retVal = 0;
     cl_command_queue_properties props = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
 
-    if( !checkDeviceForQueueSupport( deviceID, props ) )
+    if (!checkDeviceForQueueSupport(deviceID, props))
     {
-        log_info( "WARNING: Device does not support out-of-order exec mode; skipping test.\n" );
+        log_info("WARNING: Device does not support out-of-order exec mode; "
+                 "skipping test.\n");
         return 0;
     }
 
-    clCommandQueueWrapper queue = clCreateCommandQueue( context, deviceID, props, &error );
+    clCommandQueueWrapper queue =
+        clCreateCommandQueue(context, deviceID, props, &error);
     test_error(error, "Unable to create out-of-order queue");
 
-    log_info( "\n" );
+    log_info("\n");
 
-    TEST_ACTION( NDRangeKernel )
+    TEST_ACTION(NDRangeKernel)
 
-    TEST_ACTION( ReadBuffer )
-    TEST_ACTION( WriteBuffer )
-    TEST_ACTION( MapBuffer )
-    TEST_ACTION( UnmapBuffer )
+    TEST_ACTION(ReadBuffer)
+    TEST_ACTION(WriteBuffer)
+    TEST_ACTION(MapBuffer)
+    TEST_ACTION(UnmapBuffer)
 
-    if( checkForImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
+    if (checkForImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
     {
-        log_info( "\nNote: device does not support images. Skipping remainder of waitlist tests...\n" );
+        log_info("\nNote: device does not support images. Skipping remainder "
+                 "of waitlist tests...\n");
     }
     else
     {
-        TEST_ACTION( ReadImage2D )
-        TEST_ACTION( WriteImage2D )
-        TEST_ACTION( CopyImage2Dto2D )
-        TEST_ACTION( Copy2DImageToBuffer )
-        TEST_ACTION( CopyBufferTo2DImage )
-        TEST_ACTION( MapImage )
-
-        if( checkFor3DImageSupport( deviceID ) == CL_IMAGE_FORMAT_NOT_SUPPORTED )
-            log_info("Device does not support 3D images. Skipping remainder of waitlist tests...\n");
+        TEST_ACTION(ReadImage2D)
+        TEST_ACTION(WriteImage2D)
+        TEST_ACTION(CopyImage2Dto2D)
+        TEST_ACTION(Copy2DImageToBuffer)
+        TEST_ACTION(CopyBufferTo2DImage)
+        TEST_ACTION(MapImage)
+
+        if (checkFor3DImageSupport(deviceID) == CL_IMAGE_FORMAT_NOT_SUPPORTED)
+            log_info("Device does not support 3D images. Skipping remainder of "
+                     "waitlist tests...\n");
         else
         {
-            TEST_ACTION( ReadImage3D )
-            TEST_ACTION( WriteImage3D )
-            TEST_ACTION( CopyImage2Dto3D )
-            TEST_ACTION( CopyImage3Dto2D )
-            TEST_ACTION( CopyImage3Dto3D )
-            TEST_ACTION( Copy3DImageToBuffer )
-            TEST_ACTION( CopyBufferTo3DImage )
+            TEST_ACTION(ReadImage3D)
+            TEST_ACTION(WriteImage3D)
+            TEST_ACTION(CopyImage2Dto3D)
+            TEST_ACTION(CopyImage3Dto2D)
+            TEST_ACTION(CopyImage3Dto3D)
+            TEST_ACTION(Copy3DImageToBuffer)
+            TEST_ACTION(CopyBufferTo3DImage)
         }
     }
 
     return retVal;
 }
-
-- 
cgit v1.2.3


From 5d5bffba13e4187f1378a8d3f8db6d5662cf1dc2 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 13 Sep 2022 14:48:54 +0100
Subject: [NFC] Declare format tables as const (#1493)

Without const, these variables would be flagged up by
`-Wunused-variable`.

Drop `struct` from the declarations as that is not needed in C++.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/gl/common.h                       | 32 +++++++++++-----------
 test_conformance/gl/test_images_getinfo_common.cpp | 17 +++++++-----
 test_conformance/gl/test_images_read_common.cpp    | 14 +++++-----
 test_conformance/gl/test_images_write_common.cpp   |  5 ++--
 4 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/test_conformance/gl/common.h b/test_conformance/gl/common.h
index aaa6a5e7..d8587cf0 100644
--- a/test_conformance/gl/common.h
+++ b/test_conformance/gl/common.h
@@ -32,12 +32,8 @@ struct format {
 };
 
 // These are the typically tested formats.
-// TODO: These variables should be made const; until then, suppress unused
-// variable warnings as not every translation unit including this header uses
-// all variables.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-variable"
-static struct format common_formats[] = {
+// clang-format off
+static const format common_formats[] = {
 #ifdef __APPLE__
   { GL_RGBA8,        GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8,         kUChar },
   { GL_RGBA8,        GL_BGRA,             GL_UNSIGNED_INT_8_8_8_8_REV,     kUChar },
@@ -57,26 +53,30 @@ static struct format common_formats[] = {
 };
 
 #ifdef GL_VERSION_3_2
-static struct format depth_formats[] = {
+static const format depth_formats[] = {
   { GL_DEPTH_COMPONENT16,  GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,                 kUShort },
   { GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT,                          kFloat },
   { GL_DEPTH24_STENCIL8,   GL_DEPTH_STENCIL,   GL_UNSIGNED_INT_24_8,              kUInt },
   { GL_DEPTH32F_STENCIL8,  GL_DEPTH_STENCIL,   GL_FLOAT_32_UNSIGNED_INT_24_8_REV, kFloat },
 };
 #endif
-#pragma GCC diagnostic pop
+// clang-format on
 
 int test_images_write_common(cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes );
+                             cl_command_queue queue, const format *formats,
+                             size_t nformats, GLenum *targets, size_t ntargets,
+                             sizevec_t *sizes, size_t nsizes);
 
-int test_images_read_common( cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+int test_images_read_common(cl_device_id device, cl_context context,
+                            cl_command_queue queue, const format *formats,
+                            size_t nformats, GLenum *targets, size_t ntargets,
+                            sizevec_t *sizes, size_t nsizes);
 
-int test_images_get_info_common( cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes );
+int test_images_get_info_common(cl_device_id device, cl_context context,
+                                cl_command_queue queue, const format *formats,
+                                size_t nformats, GLenum *targets,
+                                size_t ntargets, sizevec_t *sizes,
+                                size_t nsizes);
 
 int is_rgb_101010_supported( cl_context context, GLenum gl_target );
 
diff --git a/test_conformance/gl/test_images_getinfo_common.cpp b/test_conformance/gl/test_images_getinfo_common.cpp
index 345b5950..2322c269 100644
--- a/test_conformance/gl/test_images_getinfo_common.cpp
+++ b/test_conformance/gl/test_images_getinfo_common.cpp
@@ -86,10 +86,11 @@ static int test_image_info( cl_context context, cl_command_queue queue,
   return CheckGLObjectInfo(streams[0], object_type, glTexture, glTarget, 0);
 }
 
-static int test_image_format_get_info(
-    cl_context context, cl_command_queue queue,
-    size_t width, size_t height, size_t depth,
-    GLenum target, struct format* fmt, MTdata data)
+static int test_image_format_get_info(cl_context context,
+                                      cl_command_queue queue, size_t width,
+                                      size_t height, size_t depth,
+                                      GLenum target, const format *fmt,
+                                      MTdata data)
 {
   int error = 0;
 
@@ -197,9 +198,11 @@ static int test_image_format_get_info(
     &actualType, (void **)&outBuffer );
 }
 
-int test_images_get_info_common( cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+int test_images_get_info_common(cl_device_id device, cl_context context,
+                                cl_command_queue queue, const format *formats,
+                                size_t nformats, GLenum *targets,
+                                size_t ntargets, sizevec_t *sizes,
+                                size_t nsizes)
 {
   int error = 0;
   RandomSeed seed(gRandomSeed);
diff --git a/test_conformance/gl/test_images_read_common.cpp b/test_conformance/gl/test_images_read_common.cpp
index 112c7891..fe2a529b 100644
--- a/test_conformance/gl/test_images_read_common.cpp
+++ b/test_conformance/gl/test_images_read_common.cpp
@@ -386,10 +386,9 @@ static int test_image_read( cl_context context, cl_command_queue queue,
     width, height, depth, sampleNum, outFormat, outType, outResultBuffer );
 }
 
-static int test_image_format_read(
-    cl_context context, cl_command_queue queue,
-    size_t width, size_t height, size_t depth,
-    GLenum target, struct format* fmt, MTdata data)
+static int test_image_format_read(cl_context context, cl_command_queue queue,
+                                  size_t width, size_t height, size_t depth,
+                                  GLenum target, const format *fmt, MTdata data)
 {
   int error = 0;
 
@@ -645,9 +644,10 @@ static int test_image_format_read(
   }
 }
 
-int test_images_read_common( cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t *sizes, size_t nsizes )
+int test_images_read_common(cl_device_id device, cl_context context,
+                            cl_command_queue queue, const format *formats,
+                            size_t nformats, GLenum *targets, size_t ntargets,
+                            sizevec_t *sizes, size_t nsizes)
 {
   int error = 0;
   RandomSeed seed(gRandomSeed);
diff --git a/test_conformance/gl/test_images_write_common.cpp b/test_conformance/gl/test_images_write_common.cpp
index 15bad520..0dba83bb 100644
--- a/test_conformance/gl/test_images_write_common.cpp
+++ b/test_conformance/gl/test_images_write_common.cpp
@@ -660,8 +660,9 @@ static int test_image_format_write( cl_context context, cl_command_queue queue,
 // combination.
 
 int test_images_write_common(cl_device_id device, cl_context context,
-  cl_command_queue queue, struct format* formats, size_t nformats,
-  GLenum *targets, size_t ntargets, sizevec_t* sizes, size_t nsizes )
+                             cl_command_queue queue, const format *formats,
+                             size_t nformats, GLenum *targets, size_t ntargets,
+                             sizevec_t *sizes, size_t nsizes)
 {
   int err = 0;
   int error = 0;
-- 
cgit v1.2.3


From d42b3dcfb6ea192b03cc37501f5e1c0e692303be Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 13 Sep 2022 17:49:09 +0100
Subject: [NFC] Fix typo (enevt_type -> event_type) (#1498)

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 test_conformance/events/test_callbacks.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test_conformance/events/test_callbacks.cpp b/test_conformance/events/test_callbacks.cpp
index 911298a5..04481dec 100644
--- a/test_conformance/events/test_callbacks.cpp
+++ b/test_conformance/events/test_callbacks.cpp
@@ -55,7 +55,7 @@ commandStatus, void * userData )
 /*   use struct as call back para */
 typedef struct
 {
-    cl_int enevt_type;
+    cl_int event_type;
     int index;
 } CALL_BACK_USER_DATA;
 
@@ -67,7 +67,7 @@ void CL_CALLBACK single_event_callback_function_flags(cl_event event,
     CALL_BACK_USER_DATA *pdata = static_cast<CALL_BACK_USER_DATA *>(userData);
 
     log_info("\tEvent callback  %d  of type %d triggered\n", pdata->index,
-             pdata->enevt_type);
+             pdata->event_type);
     sCallbackTriggered_flag[pdata->index] = true;
 }
 
@@ -95,7 +95,7 @@ int test_callback_event_single(cl_device_id device, cl_context context,
     CALL_BACK_USER_DATA user_data[EVENT_CALLBACK_TYPE_TOTAL];
     for (int i = 0; i < EVENT_CALLBACK_TYPE_TOTAL; i++)
     {
-        user_data[i].enevt_type = event_callback_types[i];
+        user_data[i].event_type = event_callback_types[i];
         user_data[i].index = i;
         error = clSetEventCallback(actualEvent, event_callback_types[i],
                                    single_event_callback_function_flags,
-- 
cgit v1.2.3


From 426097cf7c2e0e4b6c659bd0b744e6f51e61805d Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 13 Sep 2022 10:50:25 -0600
Subject: gles: Limit variable definition to the same scope as usage (#1495)

Fix unused-variable errors by limiting variable definition to the
case that would use it
---
 test_conformance/gles/main.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test_conformance/gles/main.cpp b/test_conformance/gles/main.cpp
index 644fa63c..60e020d8 100644
--- a/test_conformance/gles/main.cpp
+++ b/test_conformance/gles/main.cpp
@@ -320,8 +320,10 @@ int main(int argc, const char *argv[])
       goto cleanup;
     }
 
+#ifdef GLES3
     int argc_ = (first_32_testname) ? 1 + (argc - first_32_testname) : argc;
     const char** argv_ = (first_32_testname) ? &argv[first_32_testname-1] : argv;
+#endif
 
     // Execute the tests.
       for( size_t i = 0; i < numDevices; i++ ) {
-- 
cgit v1.2.3


From c0a10f4e12c1a4866a37449c5697a2f4c5e82e25 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kevin.petit@arm.com>
Date: Tue, 13 Sep 2022 17:58:24 +0100
Subject: Tests for cl-ext-image-from-buffer and cl-ext-image-requirements-info
 (#1438)

* Add CTS tests for cl_ext_image_requirements_info

Change-Id: I20c1c77ff5ba88eb475801bafba30ef9caf82601

* Add CTS tests for cl_ext_image_from_buffer

Change-Id: Ic30429d77a1317d0fea7d9ecc6d603267fa6602f

* Fixes for image_from_buffer and image_requirements extension

* Use CL_MEM_READ_WRITE flag when creating images that support CL_MEM_KERNEL_READ_AND_WRITE (#1447)

* format fixes

Change-Id: I04d69720730440cb61e64fed2cb5065b2ff8bf90

Co-authored-by: Oualid Khelifi <oualid.khelifi@arm.com>
Co-authored-by: oramirez <oramirez@qti.qualcomm.com>
Co-authored-by: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
---
 .../images/kernel_read_write/CMakeLists.txt        |    2 +
 test_conformance/images/kernel_read_write/main.cpp |  143 ++-
 .../kernel_read_write/test_cl_ext_image_buffer.hpp |  124 +++
 .../test_cl_ext_image_from_buffer.cpp              | 1007 ++++++++++++++++++++
 .../test_cl_ext_image_requirements_info.cpp        |  482 ++++++++++
 5 files changed, 1753 insertions(+), 5 deletions(-)
 create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
 create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
 create mode 100644 test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp

diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 54449875..6eb5dc7f 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -14,6 +14,8 @@ set(${MODULE_NAME}_SOURCES
     test_write_1D_array.cpp
     test_write_2D_array.cpp
     test_write_3D.cpp
+    test_cl_ext_image_requirements_info.cpp
+    test_cl_ext_image_from_buffer.cpp
     ../common.cpp
 )
 
diff --git a/test_conformance/images/kernel_read_write/main.cpp b/test_conformance/images/kernel_read_write/main.cpp
index 31dceb33..0a93a974 100644
--- a/test_conformance/images/kernel_read_write/main.cpp
+++ b/test_conformance/images/kernel_read_write/main.cpp
@@ -53,6 +53,43 @@ static void printUsage( const char *execName );
 
 extern int test_image_set( cl_device_id device, cl_context context, cl_command_queue queue, test_format_set_fn formatTestFn, cl_mem_object_type imageType );
 
+extern int cl_image_requirements_size_ext_negative(cl_device_id device,
+                                                   cl_context context,
+                                                   cl_command_queue queue);
+extern int cl_image_requirements_size_ext_consistency(cl_device_id device,
+                                                      cl_context context,
+                                                      cl_command_queue queue);
+extern int clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+                                                  cl_context context,
+                                                  cl_command_queue queue);
+extern int cl_image_requirements_max_val_ext_negative(cl_device_id device,
+                                                      cl_context context,
+                                                      cl_command_queue queue);
+extern int cl_image_requirements_max_val_ext_positive(cl_device_id device,
+                                                      cl_context context,
+                                                      cl_command_queue queue);
+
+extern int image2d_from_buffer_positive(cl_device_id device, cl_context context,
+                                        cl_command_queue queue);
+extern int memInfo_image_from_buffer_positive(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue);
+extern int imageInfo_image_from_buffer_positive(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue);
+extern int image_from_buffer_alignment_negative(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue);
+extern int image_from_small_buffer_negative(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue);
+extern int image_from_buffer_fill_positive(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue);
+extern int image_from_buffer_read_positive(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue);
+
 /** read_write images only support sampler-less read buildt-ins which require special settings
   * for some global parameters. This pair of functions temporarily overwrite those global parameters
   * and then recover them after completing a read_write test.
@@ -246,12 +283,108 @@ int test_2Darray(cl_device_id device, cl_context context, cl_command_queue queue
     return doTest( device, context, queue, CL_MEM_OBJECT_IMAGE2D_ARRAY );
 }
 
+int test_cl_image_requirements_size_ext_negative(cl_device_id device,
+                                                 cl_context context,
+                                                 cl_command_queue queue,
+                                                 int num_elements)
+{
+    return cl_image_requirements_size_ext_negative(device, context, queue);
+}
+int test_cl_image_requirements_size_ext_consistency(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return cl_image_requirements_size_ext_consistency(device, context, queue);
+}
+int test_clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+                                                cl_context context,
+                                                cl_command_queue queue,
+                                                int num_elements)
+{
+    return clGetImageRequirementsInfoEXT_negative(device, context, queue);
+}
+int test_cl_image_requirements_max_val_ext_negative(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return cl_image_requirements_max_val_ext_negative(device, context, queue);
+}
+int test_cl_image_requirements_max_val_ext_positive(cl_device_id device,
+                                                    cl_context context,
+                                                    cl_command_queue queue,
+                                                    int num_elements)
+{
+    return cl_image_requirements_max_val_ext_positive(device, context, queue);
+}
+
+int test_image2d_from_buffer_positive(cl_device_id device, cl_context context,
+                                      cl_command_queue queue, int num_elements)
+{
+    return image2d_from_buffer_positive(device, context, queue);
+}
+int test_memInfo_image_from_buffer_positive(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue,
+                                            int num_elements)
+{
+    return memInfo_image_from_buffer_positive(device, context, queue);
+}
+int test_imageInfo_image_from_buffer_positive(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
+{
+    return imageInfo_image_from_buffer_positive(device, context, queue);
+}
+int test_image_from_buffer_alignment_negative(cl_device_id device,
+                                              cl_context context,
+                                              cl_command_queue queue,
+                                              int num_elements)
+{
+    return image_from_buffer_alignment_negative(device, context, queue);
+}
+int test_image_from_small_buffer_negative(cl_device_id device,
+                                          cl_context context,
+                                          cl_command_queue queue,
+                                          int num_elements)
+{
+    return image_from_small_buffer_negative(device, context, queue);
+}
+int test_image_from_buffer_fill_positive(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements)
+{
+    return image_from_buffer_fill_positive(device, context, queue);
+}
+int test_image_from_buffer_read_positive(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue,
+                                         int num_elements)
+{
+    return image_from_buffer_read_positive(device, context, queue);
+}
+
 test_definition test_list[] = {
-    ADD_TEST( 1D ),
-    ADD_TEST( 2D ),
-    ADD_TEST( 3D ),
-    ADD_TEST( 1Darray ),
-    ADD_TEST( 2Darray ),
+    ADD_TEST(1D),
+    ADD_TEST(2D),
+    ADD_TEST(3D),
+    ADD_TEST(1Darray),
+    ADD_TEST(2Darray),
+    ADD_TEST_VERSION(cl_image_requirements_size_ext_negative, Version(3, 0)),
+    ADD_TEST_VERSION(cl_image_requirements_size_ext_consistency, Version(3, 0)),
+    ADD_TEST_VERSION(clGetImageRequirementsInfoEXT_negative, Version(3, 0)),
+    ADD_TEST_VERSION(cl_image_requirements_max_val_ext_negative, Version(3, 0)),
+    ADD_TEST_VERSION(cl_image_requirements_max_val_ext_positive, Version(3, 0)),
+    ADD_TEST_VERSION(image2d_from_buffer_positive, Version(3, 0)),
+    ADD_TEST_VERSION(memInfo_image_from_buffer_positive, Version(3, 0)),
+    ADD_TEST_VERSION(imageInfo_image_from_buffer_positive, Version(3, 0)),
+    ADD_TEST_VERSION(image_from_buffer_alignment_negative, Version(3, 0)),
+    ADD_TEST_VERSION(image_from_small_buffer_negative, Version(3, 0)),
+    ADD_TEST_VERSION(image_from_buffer_fill_positive, Version(3, 0)),
+    ADD_TEST_VERSION(image_from_buffer_read_positive, Version(3, 0)),
 };
 
 const int test_num = ARRAY_SIZE( test_list );
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
new file mode 100644
index 00000000..c6646330
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_buffer.hpp
@@ -0,0 +1,124 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#ifndef _TEST_CL_EXT_IMAGE_BUFFER
+#define _TEST_CL_EXT_IMAGE_BUFFER
+
+#define TEST_IMAGE_SIZE 20
+
+#define GET_EXTENSION_FUNC(platform, function_name)                            \
+    function_name##_fn function_name = reinterpret_cast<function_name##_fn>(   \
+        clGetExtensionFunctionAddressForPlatform(platform, #function_name));   \
+    if (function_name == nullptr)                                              \
+    {                                                                          \
+        return TEST_FAIL;                                                      \
+    }                                                                          \
+    do                                                                         \
+    {                                                                          \
+    } while (false)
+
+static inline size_t aligned_size(size_t size, size_t alignment)
+{
+    return (size + alignment - 1) & ~(alignment - 1);
+}
+
+static inline void* aligned_ptr(void* ptr, size_t alignment)
+{
+    return (void*)(((uintptr_t)ptr + alignment - 1) & ~(alignment - 1));
+}
+
+static inline size_t get_format_size(cl_context context,
+                                     cl_image_format* format,
+                                     cl_mem_object_type imageType,
+                                     cl_mem_flags flags)
+{
+    cl_image_desc image_desc = { 0 };
+    image_desc.image_type = imageType;
+
+    /* Size 1 only to query element size */
+    image_desc.image_width = 1;
+    if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+        && CL_MEM_OBJECT_IMAGE1D != imageType)
+    {
+        image_desc.image_height = 1;
+    }
+    if (CL_MEM_OBJECT_IMAGE3D == imageType
+        || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+    {
+        image_desc.image_depth = 1;
+    }
+    if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType
+        || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+    {
+        image_desc.image_array_size = 1;
+    }
+
+    cl_int error = 0;
+    cl_mem buffer;
+    if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+    {
+        buffer = clCreateBuffer(context, flags,
+                                get_pixel_size(format) * image_desc.image_width,
+                                NULL, &error);
+        test_error(error, "Unable to create buffer");
+
+        image_desc.buffer = buffer;
+    }
+
+    cl_mem image =
+        clCreateImage(context, flags, format, &image_desc, nullptr, &error);
+    test_error(error, "Unable to create image");
+
+    size_t element_size = 0;
+    error = clGetImageInfo(image, CL_IMAGE_ELEMENT_SIZE, sizeof(element_size),
+                           &element_size, nullptr);
+    test_error(error, "Error clGetImageInfo");
+
+    error = clReleaseMemObject(image);
+    test_error(error, "Unable to release image");
+
+    if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+    {
+        error = clReleaseMemObject(buffer);
+        test_error(error, "Unable to release buffer");
+    }
+
+    return element_size;
+}
+
+static inline void image_desc_init(cl_image_desc* desc,
+                                   cl_mem_object_type imageType)
+{
+    desc->image_type = imageType;
+    desc->image_width = TEST_IMAGE_SIZE;
+    if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+        && CL_MEM_OBJECT_IMAGE1D != imageType)
+    {
+        desc->image_height = TEST_IMAGE_SIZE;
+    }
+    if (CL_MEM_OBJECT_IMAGE3D == imageType
+        || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+    {
+        desc->image_depth = TEST_IMAGE_SIZE;
+    }
+    if (CL_MEM_OBJECT_IMAGE1D_ARRAY == imageType
+        || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+    {
+        desc->image_array_size = TEST_IMAGE_SIZE;
+    }
+}
+
+#endif /* _TEST_CL_EXT_IMAGE_BUFFER */
\ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
new file mode 100644
index 00000000..1b3b04b7
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -0,0 +1,1007 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "../testBase.h"
+#include "../common.h"
+#include "test_cl_ext_image_buffer.hpp"
+
+static int get_image_requirement_alignment(
+    cl_device_id device, cl_context context, cl_mem_flags flags,
+    const cl_image_format* image_format, const cl_image_desc* image_desc,
+    size_t* row_pitch_alignment, size_t* slice_pitch_alignment,
+    size_t* base_address_alignment)
+{
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    cl_int err = CL_SUCCESS;
+    if (nullptr != row_pitch_alignment)
+    {
+        err = clGetImageRequirementsInfoEXT(
+            context, nullptr, flags, image_format, image_desc,
+            CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+            sizeof(*row_pitch_alignment), row_pitch_alignment, nullptr);
+        test_error(err, "Error getting alignment");
+    }
+
+    if (nullptr != slice_pitch_alignment && CL_SUCCESS == err)
+    {
+        err = clGetImageRequirementsInfoEXT(
+            context, nullptr, flags, image_format, image_desc,
+            CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT,
+            sizeof(*slice_pitch_alignment), slice_pitch_alignment, nullptr);
+        test_error(err, "Error getting alignment");
+    }
+
+    if (nullptr != base_address_alignment && CL_SUCCESS == err)
+    {
+        err = clGetImageRequirementsInfoEXT(
+            context, nullptr, flags, image_format, image_desc,
+            CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT,
+            sizeof(*base_address_alignment), base_address_alignment, nullptr);
+        test_error(err, "Error getting alignment");
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Consistency with alignment requirements as returned by
+ * cl_khr_image2d_from_buffer Check that the returned values for
+ * CL_DEVICE_IMAGE_PITCH_ALIGNMENT and CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
+ * are correct.
+ */
+int image2d_from_buffer_positive(cl_device_id device, cl_context context,
+                                 cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_khr_image2d_from_buffer"))
+    {
+        printf("Extension cl_khr_image2d_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            cl_uint row_pitch_alignment_2d = 0;
+            cl_int err =
+                clGetDeviceInfo(device, CL_DEVICE_IMAGE_PITCH_ALIGNMENT,
+                                sizeof(row_pitch_alignment_2d),
+                                &row_pitch_alignment_2d, nullptr);
+            test_error(err, "Error clGetDeviceInfo");
+
+            cl_uint base_address_alignment_2d = 0;
+            err =
+                clGetDeviceInfo(device, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT,
+                                sizeof(base_address_alignment_2d),
+                                &base_address_alignment_2d, nullptr);
+            test_error(err, "Error clGetDeviceInfo");
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                size_t row_pitch_alignment = 0;
+                size_t base_address_alignment = 0;
+
+                int get_error = get_image_requirement_alignment(
+                    device, context, 0, &format, &image_desc,
+                    &row_pitch_alignment, nullptr, &base_address_alignment);
+                if (TEST_PASS != get_error)
+                {
+                    return get_error;
+                }
+
+                const size_t element_size =
+                    get_format_size(context, &format, imageType, flag);
+
+                /*  Alignements in pixels vs bytes */
+                if (base_address_alignment
+                    > base_address_alignment_2d * element_size)
+                {
+                    test_fail("Unexpected base_address_alignment");
+                }
+
+                if (row_pitch_alignment > row_pitch_alignment_2d * element_size)
+                {
+                    test_fail("Unexpected row_pitch_alignment");
+                }
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Test clGetMemObjectInfo
+ * Check that CL_MEM_ASSOCIATED_MEMOBJECT correctly returns the buffer that was
+ * used.
+ */
+int memInfo_image_from_buffer_positive(cl_device_id device, cl_context context,
+                                       cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                size_t row_pitch_alignment = 0;
+                size_t slice_pitch_alignment = 0;
+
+                int get_error = get_image_requirement_alignment(
+                    device, context, 0, &format, &image_desc,
+                    &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+                if (TEST_PASS != get_error)
+                {
+                    return get_error;
+                }
+
+                const size_t element_size =
+                    get_format_size(context, &format, imageType, flag);
+
+                const size_t row_pitch = aligned_size(
+                    TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+                const size_t slice_pitch = aligned_size(
+                    row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+                const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+                cl_int err = CL_SUCCESS;
+                cl_mem buffer =
+                    clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                image_desc.buffer = buffer;
+
+                cl_mem image_buffer = clCreateImage(context, flag, &format,
+                                                    &image_desc, nullptr, &err);
+                test_error(err, "Unable to create image");
+
+                cl_mem returned_buffer;
+                err = clGetMemObjectInfo(
+                    image_buffer, CL_MEM_ASSOCIATED_MEMOBJECT,
+                    sizeof(returned_buffer), &returned_buffer, nullptr);
+                test_error(err, "Error clGetMemObjectInfo");
+
+                if (returned_buffer != buffer)
+                {
+                    test_fail("Unexpected CL_MEM_ASSOCIATED_MEMOBJECT buffer");
+                }
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Unable to release buffer");
+
+                err = clReleaseMemObject(image_buffer);
+                test_error(err, "Unable to release image");
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Test clGetImageInfo
+ * Check that the returned values for CL_IMAGE_ROW_PITCH and
+ * CL_IMAGE_SLICE_PITCH are correct.
+ */
+int imageInfo_image_from_buffer_positive(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                size_t row_pitch_alignment = 0;
+                size_t slice_pitch_alignment = 0;
+
+                int get_error = get_image_requirement_alignment(
+                    device, context, 0, &format, &image_desc,
+                    &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+                if (TEST_PASS != get_error)
+                {
+                    return get_error;
+                }
+
+                const size_t element_size =
+                    get_format_size(context, &format, imageType, flag);
+
+                const size_t row_pitch = aligned_size(
+                    TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+                const size_t slice_pitch = aligned_size(
+                    row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+                const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+                cl_int err = CL_SUCCESS;
+                cl_mem buffer =
+                    clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                image_desc.buffer = buffer;
+
+                if (imageType == CL_MEM_OBJECT_IMAGE2D
+                    || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                {
+                    image_desc.image_row_pitch = row_pitch;
+                }
+                else if (imageType == CL_MEM_OBJECT_IMAGE3D
+                         || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    image_desc.image_row_pitch = row_pitch;
+                    image_desc.image_slice_pitch = slice_pitch;
+                }
+
+                cl_mem image_buffer = clCreateImage(context, flag, &format,
+                                                    &image_desc, nullptr, &err);
+                test_error(err, "Unable to create image");
+
+                if (imageType == CL_MEM_OBJECT_IMAGE3D
+                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY
+                    || imageType == CL_MEM_OBJECT_IMAGE2D
+                    || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                {
+                    size_t returned_row_pitch = 0;
+                    err = clGetImageInfo(image_buffer, CL_IMAGE_ROW_PITCH,
+                                         sizeof(returned_row_pitch),
+                                         &returned_row_pitch, nullptr);
+                    test_error(err, "Error clGetImageInfo");
+
+                    if (returned_row_pitch != row_pitch)
+                    {
+                        test_fail(
+                            "Unexpected row pitch "
+                            "CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT");
+                    }
+                }
+
+                if (imageType == CL_MEM_OBJECT_IMAGE3D
+                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    size_t returned_slice_pitch = 0;
+                    err = clGetImageInfo(image_buffer, CL_IMAGE_SLICE_PITCH,
+                                         sizeof(returned_slice_pitch),
+                                         &returned_slice_pitch, nullptr);
+                    test_error(err, "Error clGetImageInfo");
+
+                    if (returned_slice_pitch != slice_pitch)
+                    {
+                        test_fail(
+                            "Unexpected row pitch "
+                            "CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT");
+                    }
+                }
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Unable to release buffer");
+
+                err = clReleaseMemObject(image_buffer);
+                test_error(err, "Unable to release image");
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Negative testing for clCreateImage and wrong alignment
+ * - Create an image from a buffer with invalid row pitch (not a multiple of
+ * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned.
+ * - Create an image from a buffer with invalid slice pitch (not a multiple of
+ * required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is returned.
+ * - Create an image from a buffer with invalid base address alignment (not a
+ * multiple of required alignment) and check that CL_INVALID_IMAGE_DESCRIPTOR is
+ * returned
+ */
+int image_from_buffer_alignment_negative(cl_device_id device,
+                                         cl_context context,
+                                         cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                size_t row_pitch_alignment = 0;
+                size_t slice_pitch_alignment = 0;
+                size_t base_address_alignment = 0;
+
+                int get_error = get_image_requirement_alignment(
+                    device, context, 0, &format, &image_desc,
+                    &row_pitch_alignment, &slice_pitch_alignment,
+                    &base_address_alignment);
+                if (TEST_PASS != get_error)
+                {
+                    return get_error;
+                }
+
+                const size_t element_size =
+                    get_format_size(context, &format, imageType, flag);
+
+                const size_t row_pitch = aligned_size(
+                    TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+                const size_t slice_pitch = aligned_size(
+                    row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+                const size_t buffer_size = (slice_pitch + 1)
+                    * TEST_IMAGE_SIZE; /* For bigger row/slice pitch */
+
+                cl_int err = CL_SUCCESS;
+                cl_mem buffer =
+                    clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                /* Test Row pitch images */
+                if (imageType == CL_MEM_OBJECT_IMAGE2D
+                    || imageType == CL_MEM_OBJECT_IMAGE3D
+                    || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY
+                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    image_desc.buffer = buffer;
+                    image_desc.image_row_pitch =
+                        row_pitch + 1; /* wrong row pitch */
+
+                    clCreateImage(context, flag, &format, &image_desc, nullptr,
+                                  &err);
+                    test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                                       "Unexpected clCreateImage return");
+                }
+
+                /* Test Slice pitch images */
+                if (imageType == CL_MEM_OBJECT_IMAGE3D
+                    || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    image_desc.buffer = buffer;
+                    image_desc.image_row_pitch = row_pitch;
+                    image_desc.image_slice_pitch =
+                        slice_pitch + 1; /* wrong slice pitch */
+
+                    clCreateImage(context, flag, &format, &image_desc, nullptr,
+                                  &err);
+                    test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                                       "Unexpected clCreateImage return");
+                }
+
+                /* Test buffer from host ptr to test base address alignment */
+                const size_t aligned_buffer_size =
+                    aligned_size(buffer_size, base_address_alignment);
+                /* Create buffer with host ptr and additional size for the wrong
+                 * alignment */
+                void* const host_ptr =
+                    malloc(aligned_buffer_size + base_address_alignment);
+                void* non_aligned_host_ptr =
+                    (void*)((char*)(aligned_ptr(host_ptr,
+                                                base_address_alignment))
+                            + 1); /* wrong alignment */
+
+                cl_mem buffer_host = clCreateBuffer(
+                    context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
+                    buffer_size, non_aligned_host_ptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                image_desc.buffer = buffer_host;
+
+                clCreateImage(context, flag, &format, &image_desc, nullptr,
+                              &err);
+                test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                                   "Unexpected clCreateImage return");
+
+                free(host_ptr);
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Unable to release buffer");
+
+                err = clReleaseMemObject(buffer_host);
+                test_error(err, "Unable to release buffer");
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Negative testing for clCreateImage (buffer size).
+ * Create a buffer too small and check that image creation from that buffer is
+ * rejected
+ */
+int image_from_small_buffer_negative(cl_device_id device, cl_context context,
+                                     cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,        CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE3D,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY,  CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                /* Invalid buffer size */
+                cl_int err;
+                cl_mem buffer = clCreateBuffer(
+                    context, flag, TEST_IMAGE_SIZE / 2, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                image_desc.buffer = buffer;
+
+                clCreateImage(context, flag, &format, &image_desc, nullptr,
+                              &err);
+                test_failure_error(err, CL_INVALID_MEM_OBJECT,
+                                   "Unexpected clCreateImage return");
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Unable to release buffer");
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+static int image_from_buffer_fill_check(cl_command_queue queue, cl_mem image,
+                                        size_t* region, size_t element_size,
+                                        char pattern)
+{
+    /* read the image from buffer and check the pattern */
+    const size_t image_size = region[0] * region[1] * region[2] * element_size;
+    size_t origin[3] = { 0, 0, 0 };
+    std::vector<char> read_buffer(image_size);
+
+    cl_int error =
+        clEnqueueReadImage(queue, image, CL_BLOCKING, origin, region, 0, 0,
+                           read_buffer.data(), 0, nullptr, nullptr);
+    test_error(error, "Error clEnqueueReadImage");
+
+    for (size_t line = 0; line < region[0]; line++)
+    {
+        for (size_t row = 0; row < region[1]; row++)
+        {
+            for (size_t depth = 0; depth < region[2]; depth++)
+            {
+                for (size_t elmt = 0; elmt < element_size; elmt++)
+                {
+                    size_t index = line * row * depth * elmt;
+
+                    if (read_buffer[index] != pattern)
+                    {
+                        test_fail("Image pattern check failed");
+                    }
+                }
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Use fill buffer to fill the image from buffer
+ */
+int image_from_buffer_fill_positive(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                size_t row_pitch_alignment = 0;
+                size_t slice_pitch_alignment = 0;
+
+                int get_error = get_image_requirement_alignment(
+                    device, context, 0, &format, &image_desc,
+                    &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+                if (TEST_PASS != get_error)
+                {
+                    return get_error;
+                }
+
+                const size_t element_size =
+                    get_format_size(context, &format, imageType, flag);
+
+                const size_t row_pitch = aligned_size(
+                    TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+                const size_t slice_pitch = aligned_size(
+                    row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+                const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+                cl_int err = CL_SUCCESS;
+                cl_mem buffer =
+                    clCreateBuffer(context, flag, buffer_size, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                /* fill the buffer with a pattern */
+                const char pattern = 0x55;
+                err = clEnqueueFillBuffer(queue, buffer, &pattern,
+                                          sizeof(pattern), 0, buffer_size, 0,
+                                          nullptr, nullptr);
+                test_error(err, "Error clEnqueueFillBuffer");
+
+                err = clFinish(queue);
+                test_error(err, "Error clFinish");
+
+                cl_mem image1d_buffer;
+                if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+                {
+                    image1d_buffer = clCreateBuffer(context, flag, buffer_size,
+                                                    nullptr, &err);
+                    test_error(err, "Unable to create buffer");
+
+                    image_desc.buffer = image1d_buffer;
+                }
+
+                cl_mem image = clCreateImage(context, flag, &format,
+                                             &image_desc, nullptr, &err);
+                test_error(err, "Unable to create image");
+
+                /* Check the image from buffer */
+                image_desc.buffer = buffer;
+
+                if (imageType == CL_MEM_OBJECT_IMAGE2D
+                    || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                {
+                    image_desc.image_row_pitch = row_pitch;
+                }
+                else if (imageType == CL_MEM_OBJECT_IMAGE3D
+                         || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    image_desc.image_row_pitch = row_pitch;
+                    image_desc.image_slice_pitch = slice_pitch;
+                }
+
+                cl_mem image_from_buffer = clCreateImage(
+                    context, flag, &format, &image_desc, nullptr, &err);
+                test_error(err, "Unable to create image");
+
+                size_t origin[3] = { 0, 0, 0 };
+                size_t region[3] = { 1, 1, 1 };
+
+                region[0] = TEST_IMAGE_SIZE;
+                if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+                    && CL_MEM_OBJECT_IMAGE1D != imageType)
+                {
+                    region[1] = TEST_IMAGE_SIZE;
+                }
+                if (CL_MEM_OBJECT_IMAGE3D == imageType
+                    || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+                {
+                    region[2] = TEST_IMAGE_SIZE;
+                }
+
+                /* Check the copy of the image from buffer */
+                err =
+                    clEnqueueCopyImage(queue, image_from_buffer, image, origin,
+                                       origin, region, 0, nullptr, nullptr);
+                test_error(err, "Error clEnqueueCopyImage");
+
+                err = clFinish(queue);
+                test_error(err, "Error clFinish");
+
+                int fill_error = image_from_buffer_fill_check(
+                    queue, image_from_buffer, region, element_size, pattern);
+                if (TEST_PASS != fill_error)
+                {
+                    return fill_error;
+                }
+
+                fill_error = image_from_buffer_fill_check(
+                    queue, image, region, element_size, pattern);
+                if (TEST_PASS != fill_error)
+                {
+                    return fill_error;
+                }
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Unable to release buffer");
+
+                err = clReleaseMemObject(image);
+                test_error(err, "Unable to release image");
+
+                err = clReleaseMemObject(image_from_buffer);
+                test_error(err, "Unable to release image");
+
+                if (imageType == CL_MEM_OBJECT_IMAGE1D_BUFFER)
+                {
+                    err = clReleaseMemObject(image1d_buffer);
+                    test_error(err, "Unable to release image");
+                }
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+static int image_from_buffer_read_check(cl_command_queue queue, cl_mem buffer,
+                                        const size_t buffer_size,
+                                        size_t* region, size_t element_size,
+                                        char pattern, size_t row_pitch,
+                                        size_t slice_pitch)
+{
+    /* read the buffer and check the pattern */
+    std::vector<char> host_buffer(buffer_size);
+    char* host_ptr = host_buffer.data();
+    char* host_ptr_slice = host_ptr;
+
+    cl_int error =
+        clEnqueueReadBuffer(queue, buffer, CL_BLOCKING, 0, buffer_size,
+                            host_buffer.data(), 0, nullptr, nullptr);
+    test_error(error, "Error clEnqueueReadBuffer");
+
+    for (size_t k = 0; k < region[2]; k++)
+    {
+        for (size_t i = 0; i < region[1]; i++)
+        {
+            for (size_t j = 0; j < region[0] * element_size; j++)
+            {
+                if (host_ptr[j] != pattern)
+                {
+                    test_fail("Image pattern check failed");
+                }
+            }
+            host_ptr = host_ptr + row_pitch;
+        }
+        host_ptr_slice = host_ptr_slice + slice_pitch;
+        host_ptr = host_ptr_slice;
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Use fill image to fill the buffer that was used to create the image
+ */
+int image_from_buffer_read_positive(cl_device_id device, cl_context context,
+                                    cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    for (auto imageType : imageTypes)
+    {
+        cl_image_desc image_desc = { 0 };
+        image_desc_init(&image_desc, imageType);
+
+        /* Non normalized format so we can read it back directly from
+         * clEnqueueFillImage */
+        cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT8 };
+        const char pattern = 0x55;
+
+        const size_t element_size =
+            get_format_size(context, &format, imageType, CL_MEM_READ_WRITE);
+
+        size_t row_pitch_alignment = 0;
+        size_t slice_pitch_alignment = 0;
+
+        int get_error = get_image_requirement_alignment(
+            device, context, CL_MEM_READ_WRITE, &format, &image_desc,
+            &row_pitch_alignment, &slice_pitch_alignment, nullptr);
+        if (TEST_PASS != get_error)
+        {
+            return get_error;
+        }
+
+        const size_t row_pitch =
+            aligned_size(TEST_IMAGE_SIZE * element_size, row_pitch_alignment);
+        const size_t slice_pitch =
+            aligned_size(row_pitch * TEST_IMAGE_SIZE, slice_pitch_alignment);
+
+        const size_t buffer_size = slice_pitch * TEST_IMAGE_SIZE;
+
+        cl_int err = CL_SUCCESS;
+        cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_WRITE, buffer_size,
+                                       nullptr, &err);
+        test_error(err, "Unable to create buffer");
+
+        /* Check the image from buffer */
+        image_desc.buffer = buffer;
+
+        if (imageType == CL_MEM_OBJECT_IMAGE2D
+            || imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY)
+        {
+            image_desc.image_row_pitch = row_pitch;
+        }
+        else if (imageType == CL_MEM_OBJECT_IMAGE3D
+                 || imageType == CL_MEM_OBJECT_IMAGE2D_ARRAY)
+        {
+            image_desc.image_row_pitch = row_pitch;
+            image_desc.image_slice_pitch = slice_pitch;
+        }
+
+        cl_mem image = clCreateImage(context, CL_MEM_READ_WRITE, &format,
+                                     &image_desc, nullptr, &err);
+        test_error(err, "Unable to create image");
+
+        size_t origin[3] = { 0, 0, 0 };
+        size_t region[3] = { 1, 1, 1 };
+
+        region[0] = TEST_IMAGE_SIZE;
+        if (CL_MEM_OBJECT_IMAGE1D_BUFFER != imageType
+            && CL_MEM_OBJECT_IMAGE1D != imageType)
+        {
+            region[1] = TEST_IMAGE_SIZE;
+        }
+        if (CL_MEM_OBJECT_IMAGE3D == imageType
+            || CL_MEM_OBJECT_IMAGE2D_ARRAY == imageType)
+        {
+            region[2] = TEST_IMAGE_SIZE;
+        }
+
+        /* fill the image with a pattern */
+        cl_uint fill_color[4] = { pattern, pattern, pattern, pattern };
+        err = clEnqueueFillImage(queue, image, fill_color, origin, region, 0,
+                                 nullptr, nullptr);
+        test_error(err, "Error clEnqueueFillImage");
+
+        err = clFinish(queue);
+        test_error(err, "Error clFinish");
+
+        int read_error = image_from_buffer_read_check(
+            queue, buffer, buffer_size, region, element_size, pattern,
+            (imageType == CL_MEM_OBJECT_IMAGE1D_ARRAY) ? slice_pitch
+                                                       : row_pitch,
+            slice_pitch);
+        if (TEST_PASS != read_error)
+        {
+            return read_error;
+        }
+
+        err = clReleaseMemObject(buffer);
+        test_error(err, "Unable to release buffer");
+
+        err = clReleaseMemObject(image);
+        test_error(err, "Unable to release image");
+    }
+
+    return TEST_PASS;
+}
\ No newline at end of file
diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
new file mode 100644
index 00000000..9212fcbc
--- /dev/null
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_requirements_info.cpp
@@ -0,0 +1,482 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "../testBase.h"
+#include "../common.h"
+#include "test_cl_ext_image_buffer.hpp"
+
+/**
+ * Negative tests for {CL_IMAGE_REQUIREMENTS_SIZE_EXT}
+ * Check that attempting to perform the {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query
+ *  without specifying the _image_format_ results in {CL_INVALID_VALUE} being
+ * returned. Check that attempting to perform the
+ * {CL_IMAGE_REQUIREMENTS_SIZE_EXT} query without specifying the _image_desc_
+ * results in {CL_INVALID_VALUE} being returned.
+ */
+int cl_image_requirements_size_ext_negative(cl_device_id device,
+                                            cl_context context,
+                                            cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    size_t max_size = 0;
+    size_t param_val_size = 0;
+
+    cl_image_desc image_desc = { 0 };
+    image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE2D);
+
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+    /* Check image_format null results in CL_INVALID_VALUE */
+    cl_int err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc,
+        CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+        &param_val_size);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check image_desc null results in CL_INVALID_VALUE */
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, nullptr,
+        CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+        &param_val_size);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    return TEST_PASS;
+}
+
+/**
+ * Consistency checks for CL_IMAGE_REQUIREMENTS_SIZE_EXT
+ * When creating 2D images from a buffer is supported
+ * Check that the CL_IMAGE_REQUIREMENTS_SIZE_EXT query can be performed
+ * successfully. Create a buffer with the size returned and check that an image
+ * can successfully be created from the buffer. Check that the value returned
+ * for CL_MEM_SIZE for the image is the same as the value returned for
+ * CL_IMAGE_REQUIREMENTS_SIZE_EXT.
+ */
+int cl_image_requirements_size_ext_consistency(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    if (!is_extension_available(device, "cl_ext_image_from_buffer"))
+    {
+        printf("Extension cl_ext_image_from_buffer not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    size_t max_size = 0;
+    size_t param_val_size = 0;
+
+    std::vector<cl_mem_object_type> imageTypes{
+        CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE2D_ARRAY
+    };
+
+    std::vector<cl_mem_flags> flagTypes{ CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY,
+                                         CL_MEM_READ_WRITE,
+                                         CL_MEM_KERNEL_READ_AND_WRITE };
+
+    for (auto flag : flagTypes)
+    {
+        for (auto imageType : imageTypes)
+        {
+            /* Get the list of supported image formats */
+            std::vector<cl_image_format> formatList;
+            if (TEST_PASS
+                    != get_format_list(context, imageType, formatList, flag)
+                || formatList.size() == 0)
+            {
+                test_fail("Failure to get supported formats list");
+            }
+
+            for (auto format : formatList)
+            {
+                cl_image_desc image_desc = { 0 };
+                image_desc_init(&image_desc, imageType);
+
+                flag = (flag == CL_MEM_KERNEL_READ_AND_WRITE)
+                    ? CL_MEM_READ_WRITE
+                    : flag;
+
+                cl_int err = clGetImageRequirementsInfoEXT(
+                    context, nullptr, flag, &format, &image_desc,
+                    CL_IMAGE_REQUIREMENTS_SIZE_EXT, sizeof(max_size), &max_size,
+                    &param_val_size);
+                test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+                /* Create buffer */
+                cl_mem buffer =
+                    clCreateBuffer(context, flag, max_size, nullptr, &err);
+                test_error(err, "Unable to create buffer");
+
+                image_desc.buffer = buffer;
+
+                /* 2D Image from buffer */
+                cl_mem image_buffer = clCreateImage(context, flag, &format,
+                                                    &image_desc, nullptr, &err);
+                test_error(err, "Unable to create image");
+
+                size_t size = 0;
+                err = clGetMemObjectInfo(image_buffer, CL_MEM_SIZE,
+                                         sizeof(size_t), &size, NULL);
+                test_error(err, "Error clGetMemObjectInfo");
+
+                if (max_size != size)
+                {
+                    test_fail("CL_IMAGE_REQUIREMENTS_SIZE_EXT different from "
+                              "CL_MEM_SIZE");
+                }
+
+                err = clReleaseMemObject(image_buffer);
+                test_error(err, "Error clReleaseMemObject");
+
+                err = clReleaseMemObject(buffer);
+                test_error(err, "Error clReleaseMemObject");
+            }
+        }
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Negative testing for all testable error codes returned by
+ * clGetImageFormatInfoKHR
+ */
+int clGetImageRequirementsInfoEXT_negative(cl_device_id device,
+                                           cl_context context,
+                                           cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    cl_image_desc image_desc = { 0 };
+    image_desc_init(&image_desc, CL_MEM_OBJECT_IMAGE3D);
+
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+    /* Check that CL_INVALID_CONTEXT is returned when passing nullptr as context
+     */
+    size_t row_pitch_alignment = 0;
+    cl_int err = clGetImageRequirementsInfoEXT(
+        nullptr, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_CONTEXT,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_VALUE is returned when passing an invalid
+     * image_type */
+    cl_image_desc invalid_desc = { CL_MEM_OBJECT_BUFFER, TEST_IMAGE_SIZE };
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_VALUE is returned when passing invalid flags */
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, -1, &format, &image_desc,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_IMAGE_FORMAT_DESCRIPTOR is returned when passing a
+     * nullptr image_format */
+    cl_image_format invalid_format = { CL_INTENSITY, CL_UNORM_SHORT_555 };
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &invalid_format, &image_desc,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_IMAGE_FORMAT_DESCRIPTOR,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_IMAGE_DESCRIPTOR is returned when passing an
+     * image_desc with invalid values */
+    cl_image_desc invalid_desc_size = { CL_MEM_OBJECT_IMAGE1D, 0 };
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, &invalid_desc_size,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_VALUE is returned when passing an invalid
+     * param_name */
+    cl_image_requirements_info_ext invalid_info = CL_IMAGE_FORMAT;
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc, invalid_info,
+        sizeof(row_pitch_alignment), &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_VALUE is returned when passing a param_value_size
+     * value smaller than the size of the return type */
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+        CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT,
+        sizeof(row_pitch_alignment) - 1, &row_pitch_alignment, nullptr);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    /* Check that CL_INVALID_VALUE is returned when passing a param_value_size
+     * value smaller than the size of the return type */
+    uint32_t max_height = 0;
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+        CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height) - 1,
+        &max_height, nullptr);
+    test_failure_error(err, CL_INVALID_VALUE,
+                       "Unexpected clGetImageRequirementsInfoEXT return");
+
+    return TEST_PASS;
+}
+
+/**
+ * Negative tests for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query on all
+ * image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ *
+ * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query on all
+ * image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ *
+ * Negative testing for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT}
+ * Attempt to perform the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query on
+ * all image types for which it is not valid Check that
+ * {CL_INVALID_IMAGE_DESCRIPTOR} is returned in all cases.
+ */
+int cl_image_requirements_max_val_ext_negative(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    size_t value = 0;
+
+    std::vector<cl_mem_object_type> imageTypes_height{
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D
+    };
+
+    cl_image_format format = { CL_RGBA, CL_UNSIGNED_INT16 };
+
+    for (auto imageType : imageTypes_height)
+    {
+        cl_image_desc image_desc = { 0 };
+        image_desc_init(&image_desc, imageType);
+
+        /* Check image_format null results in CL_INVALID_VALUE */
+        cl_int err = clGetImageRequirementsInfoEXT(
+            context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+            CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(value), &value,
+            nullptr);
+        test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+                           "Unexpected clGetImageRequirementsInfoEXT return");
+    }
+
+    std::vector<cl_mem_object_type> imageTypes_depth{
+        CL_MEM_OBJECT_IMAGE2D, CL_MEM_OBJECT_IMAGE2D_ARRAY,
+        CL_MEM_OBJECT_IMAGE1D_ARRAY, CL_MEM_OBJECT_IMAGE1D_BUFFER,
+        CL_MEM_OBJECT_IMAGE1D
+    };
+
+    for (auto imageType : imageTypes_depth)
+    {
+        cl_image_desc image_desc = { 0 };
+        image_desc_init(&image_desc, imageType);
+
+        /* Check image_format null results in CL_INVALID_VALUE */
+        cl_int err = clGetImageRequirementsInfoEXT(
+            context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+            CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, sizeof(value), &value,
+            nullptr);
+        test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+                           "Unexpected clGetImageRequirementsInfoEXT return");
+    }
+
+    std::vector<cl_mem_object_type> imageTypes_array_size{
+        CL_MEM_OBJECT_IMAGE3D, CL_MEM_OBJECT_IMAGE2D,
+        CL_MEM_OBJECT_IMAGE1D_BUFFER, CL_MEM_OBJECT_IMAGE1D
+    };
+
+    for (auto imageType : imageTypes_array_size)
+    {
+        cl_image_desc image_desc = { 0 };
+        image_desc_init(&image_desc, imageType);
+
+        /* Check image_format null results in CL_INVALID_VALUE */
+        cl_int err = clGetImageRequirementsInfoEXT(
+            context, nullptr, CL_MEM_READ_WRITE, &format, &image_desc,
+            CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(value), &value,
+            nullptr);
+        test_failure_error(err, CL_INVALID_IMAGE_DESCRIPTOR,
+                           "Unexpected clGetImageRequirementsInfoEXT return");
+    }
+
+    return TEST_PASS;
+}
+
+/**
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT} query can be performed
+ *successfully
+ *
+ * Consistency checks for {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT}
+ ** Check that the {CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT} query can be
+ *performed successfully
+ */
+int cl_image_requirements_max_val_ext_positive(cl_device_id device,
+                                               cl_context context,
+                                               cl_command_queue queue)
+{
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
+    cl_platform_id platform = getPlatformFromDevice(device);
+    GET_EXTENSION_FUNC(platform, clGetImageRequirementsInfoEXT);
+
+    /* CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT */
+    cl_image_desc image_desc_1d = { 0 };
+    image_desc_init(&image_desc_1d, CL_MEM_OBJECT_IMAGE1D);
+
+    uint32_t max_width = 0;
+    cl_int err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_1d,
+        CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, sizeof(max_width), &max_width,
+        nullptr);
+    test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+    size_t width_1d = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE,
+                          sizeof(width_1d), &width_1d, NULL);
+    test_error(err, "Error clGetDeviceInfo");
+
+    if (!(max_width <= width_1d && max_width > 0))
+    {
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT value");
+    }
+
+    /* CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT */
+    cl_image_desc image_desc_2d = { 0 };
+    image_desc_init(&image_desc_2d, CL_MEM_OBJECT_IMAGE2D);
+
+    uint32_t max_height = 0;
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_2d,
+        CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, sizeof(max_height), &max_height,
+        nullptr);
+    test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+    size_t height_2d = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT,
+                          sizeof(height_2d), &height_2d, NULL);
+    test_error(err, "Error clGetDeviceInfo");
+
+    if (!(max_height <= height_2d && max_height > 0))
+    {
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT value");
+    }
+
+    /* CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT */
+    cl_image_desc image_desc_3d = { 0 };
+    image_desc_init(&image_desc_3d, CL_MEM_OBJECT_IMAGE3D);
+
+    uint32_t max_depth = 0;
+    err = clGetImageRequirementsInfoEXT(context, nullptr, CL_MEM_READ_WRITE,
+                                        nullptr, &image_desc_3d,
+                                        CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT,
+                                        sizeof(max_depth), &max_depth, nullptr);
+    test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+    size_t depth_3d = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth_3d),
+                          &depth_3d, NULL);
+    test_error(err, "Error clGetDeviceInfo");
+
+    if (!(max_depth <= depth_3d && max_depth > 0))
+    {
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT value");
+    }
+
+    /* CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT */
+    cl_image_desc image_desc_array = { 0 };
+    image_desc_init(&image_desc_array, CL_MEM_OBJECT_IMAGE2D_ARRAY);
+
+    uint32_t max_array_size = 0;
+    err = clGetImageRequirementsInfoEXT(
+        context, nullptr, CL_MEM_READ_WRITE, nullptr, &image_desc_array,
+        CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, sizeof(max_array_size),
+        &max_array_size, nullptr);
+    test_error(err, "Error clGetImageRequirementsInfoEXT");
+
+    size_t array_size = 0;
+    err = clGetDeviceInfo(device, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE,
+                          sizeof(array_size), &array_size, NULL);
+    test_error(err, "Error clGetDeviceInfo");
+
+    if (!(max_array_size <= array_size && max_array_size > 0))
+    {
+        test_fail("Unexpected CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT value");
+    }
+
+    return TEST_PASS;
+}
\ No newline at end of file
-- 
cgit v1.2.3


From 7859a33182a96462067448b4a350823543f8dd39 Mon Sep 17 00:00:00 2001
From: Stuart Brady <stuart.brady@arm.com>
Date: Tue, 13 Sep 2022 18:22:52 +0100
Subject: Include release builds in GitHub Actions (#1486)

The "Ninja" CMake generator does not support multiple configurations,
i.e. it does not support use of the '--config' option when running
'cmake --build'.  As such, the default configuration (i.e. Debug)
was getting used for all builds.

Use the CMAKE_BUILD_TYPE variable instead, so that we do release
builds, but change one build (ubuntu-20.04 aarch64) to use Debug
as its build type, to keep some build coverage for asserts, etc.

For Vulkan-Loader and OpenCL-ICD-Loader, we do release builds
unconditionally, as we assume there is no need in the CI workflow
to actually run the binaries that are built, and therefore no need
for any additional debug info.

Signed-off-by: Stuart Brady <stuart.brady@arm.com>
---
 .github/workflows/presubmit.yml |  6 +++++-
 presubmit.sh                    | 32 +++++++++++++++++++++++++-------
 2 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml
index bac4ceba..1dfdb963 100644
--- a/.github/workflows/presubmit.yml
+++ b/.github/workflows/presubmit.yml
@@ -3,11 +3,12 @@ on: [push, pull_request]
 
 jobs:
   build:
-    name: Build ${{ matrix.os }} ${{ matrix.arch }}
+    name: Build ${{ matrix.os }} ${{ matrix.arch }}${{ matrix.extra }}
     runs-on: ${{ matrix.os }}
     env:
       JOB_ARCHITECTURE: ${{ matrix.arch }}
       JOB_ENABLE_GL: ${{ matrix.gl }}
+      JOB_ENABLE_DEBUG: ${{ matrix.debug }}
     strategy:
       fail-fast: false
       matrix:
@@ -17,12 +18,15 @@ jobs:
           - os: ubuntu-20.04
             mainmatrix: true
             gl: 1
+            extra: " gl"
           - os: ubuntu-20.04
             mainmatrix: false
             arch: arm
           - os: ubuntu-20.04
             mainmatrix: false
             arch: aarch64
+            debug: 1
+            extra: " debug"
     steps:
       - uses: actions/checkout@v2
       - name: Setup Ninja
diff --git a/presubmit.sh b/presubmit.sh
index 6c3a293e..ca39b9a2 100755
--- a/presubmit.sh
+++ b/presubmit.sh
@@ -14,6 +14,9 @@ TOOLCHAIN_FILE=${TOP}/toolchain.cmake
 touch ${TOOLCHAIN_FILE}
 BUILD_OPENGL_TEST="OFF"
 
+cmake --version
+echo
+
 # Prepare toolchain if needed
 if [[ ${JOB_ARCHITECTURE} != "" && ${RUNNER_OS} != "Windows" ]]; then
     TOOLCHAIN_URL_VAR=TOOLCHAIN_URL_${JOB_ARCHITECTURE}
@@ -40,6 +43,12 @@ if [[ ( ${JOB_ARCHITECTURE} == "" && ${JOB_ENABLE_GL} == "1" ) ]]; then
     BUILD_OPENGL_TEST="ON"
 fi
 
+if [[ ${JOB_ENABLE_DEBUG} == 1 ]]; then
+    BUILD_CONFIG="Debug"
+else
+    BUILD_CONFIG="Release"
+fi
+
 #Vulkan Headers
 git clone https://github.com/KhronosGroup/Vulkan-Headers.git
 
@@ -48,8 +57,11 @@ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader.git
 cd ${TOP}/OpenCL-ICD-Loader
 mkdir build
 cd build
-cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
-cmake --build . -j2 --config Release
+cmake .. -G Ninja \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+      -DOPENCL_ICD_LOADER_HEADERS_DIR=${TOP}/OpenCL-Headers/
+cmake --build . -j2
 
 #Vulkan Loader
 cd ${TOP}
@@ -58,8 +70,15 @@ cd Vulkan-Loader
 mkdir build
 cd build
 python3 ../scripts/update_deps.py
-cmake .. -G Ninja -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} -DBUILD_WSI_XLIB_SUPPORT=OFF -DBUILD_WSI_XCB_SUPPORT=OFF -DBUILD_WSI_WAYLAND_SUPPORT=OFF -DUSE_GAS=OFF -C helper.cmake ..
-cmake --build . -j2 --config Release
+cmake .. -G Ninja \
+      -DCMAKE_BUILD_TYPE=Release \
+      -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
+      -DBUILD_WSI_XLIB_SUPPORT=OFF \
+      -DBUILD_WSI_XCB_SUPPORT=OFF \
+      -DBUILD_WSI_WAYLAND_SUPPORT=OFF \
+      -DUSE_GAS=OFF \
+      -C helper.cmake ..
+cmake --build . -j2
 
 # Build CTS
 cd ${TOP}
@@ -74,6 +93,7 @@ else
   CMAKE_CACHE_OPTIONS="-DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache"
 fi
 cmake .. -G Ninja \
+      -DCMAKE_BUILD_TYPE="${BUILD_CONFIG}" \
       ${CMAKE_CACHE_OPTIONS} \
       -DCL_INCLUDE_DIR=${TOP}/OpenCL-Headers \
       -DCL_LIB_DIR=${TOP}/OpenCL-ICD-Loader/build \
@@ -84,6 +104,4 @@ cmake .. -G Ninja \
       -DGL_IS_SUPPORTED=${BUILD_OPENGL_TEST} \
       -DVULKAN_INCLUDE_DIR=${TOP}/Vulkan-Headers/include/ \
       -DVULKAN_LIB_DIR=${TOP}/Vulkan-Loader/build/loader/
-cmake --build . -j3 --config Release
-
-
+cmake --build . -j3
-- 
cgit v1.2.3


From a87e686757f9fda5377baf73a32bb3c791eae70c Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 16 Sep 2022 13:34:36 +0100
Subject: Fix more warnings in math_brute_force (#1502)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix "‘nadj’ may be used uninitialized in this function
   [-Werror=maybe-uninitialized]".

 * Fix "specified bound 4096 equals destination size
   [-Werror=stringop-truncation]".

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/main.cpp           | 8 +++++---
 test_conformance/math_brute_force/reference_math.cpp | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 8cebff9d..ee3fcbd9 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -360,16 +360,18 @@ static int ParseArgs(int argc, const char **argv)
     int singleThreaded = 0;
 
     { // Extract the app name
-        strncpy(appName, argv[0], MAXPATHLEN);
+        strncpy(appName, argv[0], MAXPATHLEN - 1);
+        appName[MAXPATHLEN - 1] = '\0';
 
 #if defined(__APPLE__)
         char baseName[MAXPATHLEN];
         char *base = NULL;
-        strncpy(baseName, argv[0], MAXPATHLEN);
+        strncpy(baseName, argv[0], MAXPATHLEN - 1);
+        baseName[MAXPATHLEN - 1] = '\0';
         base = basename(baseName);
         if (NULL != base)
         {
-            strncpy(appName, base, sizeof(appName));
+            strncpy(appName, base, sizeof(appName) - 1);
             appName[sizeof(appName) - 1] = '\0';
         }
 #endif
diff --git a/test_conformance/math_brute_force/reference_math.cpp b/test_conformance/math_brute_force/reference_math.cpp
index f91ecb22..afa072f8 100644
--- a/test_conformance/math_brute_force/reference_math.cpp
+++ b/test_conformance/math_brute_force/reference_math.cpp
@@ -1949,7 +1949,8 @@ double reference_lgamma(double x)
         w6 = -1.63092934096575273989e-03; /* 0xBF5AB89D, 0x0B9E43E4 */
 
     static const double zero = 0.00000000000000000000e+00;
-    double t, y, z, nadj, p, p1, p2, p3, q, r, w;
+    double nadj = zero;
+    double t, y, z, p, p1, p2, p3, q, r, w;
     cl_int i, hx, lx, ix;
 
     union {
-- 
cgit v1.2.3


From 8f9c1960ff5a48d85662c568ffd43c74459fcf4c Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 20 Sep 2022 16:52:22 +0100
Subject: Improve MTdataHolder design and use it in math_brute_force (#1490)

Improve the design of the MTdataHolder wrapper:

 * Make it a class instead of a struct with a private member, to make
   it clearer that there is no direct access to the MTdata member.

 * Make the 1-arg constructor `explicit` to avoid unintended
   conversions.

 * Forbid copy construction/assignment as MTdataHolder is never
   initialised from an MTdataHolder object in the codebase.

 * Define move construction/assignment as per the "rule of five".

Use the MTdataHolder class throughout math_brute_force, to simplify
code by avoiding manual resource management.

Original patch by Marco Antognini.

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_common/harness/mt19937.h                      | 33 +++++++++++++++++-----
 .../math_brute_force/binary_double.cpp             | 11 ++------
 test_conformance/math_brute_force/binary_float.cpp | 11 ++------
 .../math_brute_force/binary_i_double.cpp           | 11 ++------
 .../math_brute_force/binary_i_float.cpp            | 11 ++------
 .../math_brute_force/binary_operator_double.cpp    | 11 ++------
 .../math_brute_force/binary_operator_float.cpp     | 11 ++------
 .../math_brute_force/macro_binary_double.cpp       | 11 ++------
 .../math_brute_force/macro_binary_float.cpp        | 11 ++------
 .../math_brute_force/macro_unary_double.cpp        |  2 +-
 .../math_brute_force/macro_unary_float.cpp         |  2 +-
 test_conformance/math_brute_force/main.cpp         |  6 ++--
 test_conformance/math_brute_force/unary_double.cpp |  2 +-
 test_conformance/math_brute_force/unary_float.cpp  |  2 +-
 14 files changed, 56 insertions(+), 79 deletions(-)

diff --git a/test_common/harness/mt19937.h b/test_common/harness/mt19937.h
index 98eec843..447ca25a 100644
--- a/test_common/harness/mt19937.h
+++ b/test_common/harness/mt19937.h
@@ -94,23 +94,42 @@ double genrand_res53(MTdata /*data*/);
 bool genrand_bool(MTdata /*data*/);
 
 #include <cassert>
+#include <utility>
 
-struct MTdataHolder
-{
-    MTdataHolder(cl_uint seed)
+class MTdataHolder {
+public:
+    MTdataHolder() = default;
+    explicit MTdataHolder(cl_uint seed)
     {
         m_mtdata = init_genrand(seed);
         assert(m_mtdata != nullptr);
     }
 
-    MTdataHolder(MTdata mtdata): m_mtdata(mtdata) {}
+    // Forbid copy.
+    MTdataHolder(const MTdataHolder&) = delete;
+    MTdataHolder& operator=(const MTdataHolder&) = delete;
 
-    ~MTdataHolder() { free_mtdata(m_mtdata); }
+    // Support move semantics.
+    MTdataHolder(MTdataHolder&& h) { std::swap(m_mtdata, h.m_mtdata); }
+    MTdataHolder& operator=(MTdataHolder&& h)
+    {
+        std::swap(m_mtdata, h.m_mtdata);
+        return *this;
+    }
 
-    operator MTdata() const { return m_mtdata; }
+    ~MTdataHolder()
+    {
+        if (m_mtdata) free_mtdata(m_mtdata);
+    }
+
+    operator MTdata() const
+    {
+        assert(m_mtdata && "Object wasn't initialised");
+        return m_mtdata;
+    }
 
 private:
-    MTdata m_mtdata;
+    MTdata m_mtdata = nullptr;
 };
 
 #endif // #ifdef __cplusplus
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index 1b1f7d4c..b6bb049b 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -134,7 +134,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -691,7 +691,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -740,7 +740,7 @@ int TestFunc_Double_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -792,10 +792,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index d229a376..e85add4b 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -132,7 +132,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -848,7 +848,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -897,7 +897,7 @@ int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -949,10 +949,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_i_double.cpp b/test_conformance/math_brute_force/binary_i_double.cpp
index 7baa21a2..f8786e68 100644
--- a/test_conformance/math_brute_force/binary_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_i_double.cpp
@@ -133,7 +133,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -610,7 +610,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -662,7 +662,7 @@ int TestFunc_Double_Double_Int(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -714,10 +714,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 3f998e2e..2387ff06 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -131,7 +131,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     cl_int maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -603,7 +603,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -655,7 +655,7 @@ int TestFunc_Float_Float_Int(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -707,10 +707,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 74883664..34ec6197 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -133,7 +133,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -658,7 +658,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -707,7 +707,7 @@ int TestFunc_Double_Double_Double_Operator(const Func *f, MTdata d,
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -759,10 +759,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 56f293c1..5577cffe 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -131,7 +131,7 @@ struct ThreadInfo
         maxErrorValue; // position of the max error value (param 1).  Init to 0.
     double maxErrorValue2; // position of the max error value (param 2).  Init
                            // to 0.
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -785,7 +785,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -834,7 +834,7 @@ int TestFunc_Float_Float_Float_Operator(const Func *f, MTdata d,
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -886,10 +886,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index a697a7be..b81766bd 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -127,7 +127,7 @@ struct ThreadInfo
     clMemWrapper inBuf2;
     Buffers outBuf;
 
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -616,7 +616,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -665,7 +665,7 @@ int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -704,10 +704,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 97e2f675..4a3fb67d 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -125,7 +125,7 @@ struct ThreadInfo
     clMemWrapper inBuf2;
     Buffers outBuf;
 
-    MTdata d;
+    MTdataHolder d;
 
     // Per thread command queue to improve performance
     clCommandQueueWrapper tQueue;
@@ -605,7 +605,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
@@ -654,7 +654,7 @@ int TestMacro_Int_Float_Float(const Func *f, MTdata d, bool relaxedMode)
             goto exit;
         }
 
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
     }
 
     // Init the kernels
@@ -693,10 +693,5 @@ exit:
         }
     }
 
-    for (auto &threadInfo : test_info.tinfo)
-    {
-        free_mtdata(threadInfo.d);
-    }
-
     return error;
 }
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 5a3ad355..19cefee4 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -400,7 +400,7 @@ int TestMacro_Int_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/macro_unary_float.cpp b/test_conformance/math_brute_force/macro_unary_float.cpp
index d2982156..6a1b9b9a 100644
--- a/test_conformance/math_brute_force/macro_unary_float.cpp
+++ b/test_conformance/math_brute_force/macro_unary_float.cpp
@@ -414,7 +414,7 @@ int TestMacro_Int_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index ee3fcbd9..2c81de87 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -98,7 +98,7 @@ cl_mem gInBuffer2 = NULL;
 cl_mem gInBuffer3 = NULL;
 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
-static MTdata gMTdata;
+static MTdataHolder gMTdata;
 cl_device_fp_config gFloatCapabilities = 0;
 int gWimpyReductionFactor = 32;
 int gVerboseBruteForce = 0;
@@ -326,7 +326,7 @@ int main(int argc, const char *argv[])
     vlog("\n-------------------------------------------------------------------"
          "----------------------------------------\n");
 
-    gMTdata = init_genrand(gRandomSeed);
+    gMTdata = MTdataHolder(gRandomSeed);
 
     FPU_mode_type oldMode;
     DisableFTZ(&oldMode);
@@ -336,8 +336,6 @@ int main(int argc, const char *argv[])
 
     RestoreFPState(&oldMode);
 
-    free_mtdata(gMTdata);
-
     if (gQueue)
     {
         int error_code = clFinish(gQueue);
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 7dfc12b1..3deac57c 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -427,7 +427,7 @@ int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
diff --git a/test_conformance/math_brute_force/unary_float.cpp b/test_conformance/math_brute_force/unary_float.cpp
index 6a5c3539..4c1f1a1d 100644
--- a/test_conformance/math_brute_force/unary_float.cpp
+++ b/test_conformance/math_brute_force/unary_float.cpp
@@ -580,7 +580,7 @@ int TestFunc_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         test_info.k[i].resize(test_info.threadCount, nullptr);
     }
 
-    test_info.tinfo.resize(test_info.threadCount, ThreadInfo{});
+    test_info.tinfo.resize(test_info.threadCount);
     for (cl_uint i = 0; i < test_info.threadCount; i++)
     {
         cl_buffer_region region = {
-- 
cgit v1.2.3


From 76bd9d36744b89e791423ba16f9db323816888e4 Mon Sep 17 00:00:00 2001
From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com>
Date: Tue, 20 Sep 2022 21:23:34 +0530
Subject: Fix memory oob problem in test half (#1489)

Allocate memory for argc arguments
instead of argc - 1.
---
 test_conformance/half/main.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 6600cc58..104f4616 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -131,8 +131,7 @@ exit:
 static int ParseArgs( int argc, const char **argv )
 {
     int i;
-    argList = (const char **)calloc( argc - 1, sizeof( char*) );
-
+    argList = (const char **)calloc(argc, sizeof(char *));
     if( NULL == argList )
     {
         vlog_error( "Failed to allocate memory for argList.\n" );
-- 
cgit v1.2.3


From 75edf2a8811da5ac379c9dc994d371f31bb74b6e Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 20 Sep 2022 16:55:51 +0100
Subject: [NFC] Enable -Wall for math_brute_force (#1477)

math_brute_force compiles cleanly with `-Wall` currently, so avoid
regressing from that state.  Ideally we would enable `-Wall` in the
top-level CMakeLists.txt, but other tests do not compile cleanly with
`-Wall` yet.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/CMakeLists.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 28d2716f..23ee6849 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -40,4 +40,14 @@ set(${MODULE_NAME}_SOURCES
     utility.h
 )
 
+# math_brute_force compiles cleanly with -Wall but other tests not (yet), so
+# enable -Wall locally.
+if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+    SET_SOURCE_FILES_PROPERTIES(
+        ${${MODULE_NAME}_SOURCES}
+        PROPERTIES
+        COMPILE_FLAGS "-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas"
+    )
+endif()
+
 include(../CMakeCommon.txt)
-- 
cgit v1.2.3


From 86d5ee54140a9b0bf2bef5770e00748438bf05fe Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 20 Sep 2022 21:37:14 +0530
Subject:  Update extension list of test_compiler (#1507)

* Update extension list of test_compiler

Upate extension list of test_compiler
with missing external memory and semaphore
extensions
---
 test_conformance/compiler/test_compiler_defines_for_extensions.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
index 94657d61..b95b0f53 100644
--- a/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
+++ b/test_conformance/compiler/test_compiler_defines_for_extensions.cpp
@@ -79,11 +79,13 @@ const char *known_extensions[] = {
     "cl_khr_spirv_linkonce_odr",
     "cl_khr_semaphore",
     "cl_khr_external_semaphore",
-    "cl_khr_external_semaphore_opaque_fd",
+    "cl_khr_external_semaphore_win32",
     "cl_khr_external_semaphore_sync_fd",
-    "cl_khr_command_buffer",
+    "cl_khr_external_semaphore_opaque_fd",
     "cl_khr_external_memory",
+    "cl_khr_external_memory_win32",
     "cl_khr_external_memory_opaque_fd",
+    "cl_khr_command_buffer",
     "cl_khr_command_buffer_mutable_dispatch",
 };
 
-- 
cgit v1.2.3


From 92285f7c9de965ddc41e7dfaaab8c7c75aa55dbe Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 22 Sep 2022 21:17:55 +0100
Subject: cmake: Add set_gnulike_module_compile_flags (#1510)

Factor out a macro to set module-specific compilation flags for
GNU-like compilers.  This simplifies setting compilation flags per
test.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 CMakeLists.txt                                           | 11 +++++++++++
 test_conformance/images/kernel_read_write/CMakeLists.txt |  8 +-------
 test_conformance/math_brute_force/CMakeLists.txt         | 12 +++---------
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b7c86ba1..6a25d5b5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -113,6 +113,17 @@ else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /D__SSE__")
 endif()
 
+# Set a module's COMPILE_FLAGS if using gcc or clang.
+macro(set_gnulike_module_compile_flags flags)
+    if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
+        SET_SOURCE_FILES_PROPERTIES(
+            ${${MODULE_NAME}_SOURCES}
+            PROPERTIES
+            COMPILE_FLAGS ${flags}
+        )
+    endif()
+endmacro(set_gnulike_module_compile_flags)
+
 if(MSVC)
     # Don't warn when using standard non-secure functions.
     add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
diff --git a/test_conformance/images/kernel_read_write/CMakeLists.txt b/test_conformance/images/kernel_read_write/CMakeLists.txt
index 6eb5dc7f..ccd678c1 100644
--- a/test_conformance/images/kernel_read_write/CMakeLists.txt
+++ b/test_conformance/images/kernel_read_write/CMakeLists.txt
@@ -21,13 +21,7 @@ set(${MODULE_NAME}_SOURCES
 
 # Make unused variables not fatal in this module; see
 # https://github.com/KhronosGroup/OpenCL-CTS/issues/1484
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
-  SET_SOURCE_FILES_PROPERTIES(
-    ${${MODULE_NAME}_SOURCES}
-    PROPERTIES
-    COMPILE_FLAGS "-Wno-error=unused-variable"
-  )
-endif()
+set_gnulike_module_compile_flags("-Wno-error=unused-variable")
 
 include(../../CMakeCommon.txt)
 
diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 23ee6849..1db1ecdf 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -40,14 +40,8 @@ set(${MODULE_NAME}_SOURCES
     utility.h
 )
 
-# math_brute_force compiles cleanly with -Wall but other tests not (yet), so
-# enable -Wall locally.
-if(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_CXX_COMPILER_ID}" MATCHES "(Apple)?Clang")
-    SET_SOURCE_FILES_PROPERTIES(
-        ${${MODULE_NAME}_SOURCES}
-        PROPERTIES
-        COMPILE_FLAGS "-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas"
-    )
-endif()
+# math_brute_force compiles cleanly with -Wall (except for a few remaining
+# warnings), but other tests not (yet); so enable -Wall locally.
+set_gnulike_module_compile_flags("-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas")
 
 include(../CMakeCommon.txt)
-- 
cgit v1.2.3


From 180adef84c535588c1743673f3468c28cf564a09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Kosi=C5=84ski?= <tweenk.pl@gmail.com>
Date: Fri, 23 Sep 2022 09:29:18 -0700
Subject: Remove __DATE__ and __TIME__ usage (#1506)

These macros make the build non-deterministic.
---
 test_conformance/contractions/contractions.cpp    | 3 +--
 test_conformance/conversions/test_conversions.cpp | 4 +---
 test_conformance/half/main.cpp                    | 4 +---
 test_conformance/math_brute_force/main.cpp        | 2 --
 test_conformance/printf/test_printf.cpp           | 4 +---
 test_conformance/select/test_select.cpp           | 4 +---
 6 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/test_conformance/contractions/contractions.cpp b/test_conformance/contractions/contractions.cpp
index dddebb40..474fd364 100644
--- a/test_conformance/contractions/contractions.cpp
+++ b/test_conformance/contractions/contractions.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -434,7 +434,6 @@ static int ParseArgs( int argc, const char **argv )
             gArgCount++;
         }
     }
-    vlog( "\n\nTest binary built %s %s\n", __DATE__, __TIME__ );
 
     PrintArch();
 
diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 788af99b..765d09ff 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -484,8 +484,6 @@ static int ParseArgs( int argc, const char **argv )
 
     vlog( "\n" );
 
-    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
     PrintArch();
 
     if( gWimpyMode )
diff --git a/test_conformance/half/main.cpp b/test_conformance/half/main.cpp
index 104f4616..6bc7db95 100644
--- a/test_conformance/half/main.cpp
+++ b/test_conformance/half/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -221,7 +221,6 @@ static int ParseArgs( int argc, const char **argv )
       gWimpyMode = 1;
     }
 
-    vlog( "Test binary built %s %s\n", __DATE__, __TIME__ );
     PrintArch();
     if( gWimpyMode )
     {
@@ -247,4 +246,3 @@ static void PrintUsage( void )
         vlog("\t\t%s\n", test_list[i].name );
     }
 }
-
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 2c81de87..59960a85 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -502,8 +502,6 @@ static int ParseArgs(int argc, const char **argv)
         gWimpyMode = 1;
     }
 
-    vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
-
     PrintArch();
 
     if (gWimpyMode)
diff --git a/test_conformance/printf/test_printf.cpp b/test_conformance/printf/test_printf.cpp
index a32ee4ea..d638cd46 100644
--- a/test_conformance/printf/test_printf.cpp
+++ b/test_conformance/printf/test_printf.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -1030,8 +1030,6 @@ test_status InitCL( cl_device_id device )
         return TEST_SKIP;
     }
 
-    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
-
     gFd = acquireOutputStream(&err);
     if (err != 0)
     {
diff --git a/test_conformance/select/test_select.cpp b/test_conformance/select/test_select.cpp
index 27ee5ffd..972a53c6 100644
--- a/test_conformance/select/test_select.cpp
+++ b/test_conformance/select/test_select.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -639,7 +639,6 @@ int main(int argc, const char* argv[])
         s_wimpy_mode = true;
     }
 
-    log_info( "Test binary built %s %s\n", __DATE__, __TIME__ );
     if (s_wimpy_mode) {
         log_info("\n");
         log_info("*** WARNING: Testing in Wimpy mode!                     ***\n");
@@ -668,4 +667,3 @@ static void printUsage( void )
         log_info( "\t%s\n", test_list[i].name );
     }
 }
-
-- 
cgit v1.2.3


From 2012c6cadd4707d40a83da5fecd080de908d5973 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 23 Sep 2022 18:08:10 +0100
Subject: [NFC] Fix typo in clang-format directive (#1512)

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/pipes/test_pipe_limits.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/pipes/test_pipe_limits.cpp b/test_conformance/pipes/test_pipe_limits.cpp
index 7e979251..e1048f5f 100644
--- a/test_conformance/pipes/test_pipe_limits.cpp
+++ b/test_conformance/pipes/test_pipe_limits.cpp
@@ -69,7 +69,7 @@ void createKernelSourceCode(std::stringstream &stream, int num_pipes)
                   }
           }
           )";
-          // clang-format om
+        // clang-format on
     }
     stream << R"(
     }
-- 
cgit v1.2.3


From c014122742c211f8febb49324b9e99302e26018c Mon Sep 17 00:00:00 2001
From: ellnor01 <51320439+ellnor01@users.noreply.github.com>
Date: Mon, 26 Sep 2022 12:57:42 +0100
Subject: Creating common functions for image/kernel_read_write read tests
 (#1141)

* Make InitFloatCoords suitable for all image types

Contributes #616

* Create common functions neutral for image types

Remove 3D specific code from common test_read_image so using
it for other image types is simpler in following patches

Contributes #616

* Removing unused code

Tidying commented out or unnecessary code

Contributes #616

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>

* Restoring 'lod' variable name

Contributes #616

* Default cases to handle unsupported image types

Contributes #616

* Resolving build issues

Contributes #616

* Fix formatting

Contributes #616

* Using TEST_FAIL as an error code.

Contributes #616

* Add static keyword, improve error handling

Contributes #616

* Fix build errors with least disruption

Contributes #616

Signed-off-by: Ellen Norris-Thompson <ellen.norris-thompson@arm.com>
---
 .../images/kernel_read_write/test_common.cpp       | 443 +++++++++++++--------
 .../images/kernel_read_write/test_common.h         | 139 +++----
 2 files changed, 331 insertions(+), 251 deletions(-)

diff --git a/test_conformance/images/kernel_read_write/test_common.cpp b/test_conformance/images/kernel_read_write/test_common.cpp
index 62bd4ab1..a22db195 100644
--- a/test_conformance/images/kernel_read_write/test_common.cpp
+++ b/test_conformance/images/kernel_read_write/test_common.cpp
@@ -34,122 +34,210 @@ cl_sampler create_sampler(cl_context context, image_sampler_data *sdata, bool te
     return sampler;
 }
 
-void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                           image_sampler_data *imageSampler, float *xOffsets,
-                           float *yOffsets, float *zOffsets, float xfract,
-                           float yfract, float zfract, int normalized_coords,
-                           MTdata d, int lod)
+bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+                          size_t &height, size_t &depth)
+{
+    width = imageInfo->width;
+    height = 1;
+    depth = 1;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE1D: break;
+        case CL_MEM_OBJECT_IMAGE1D_ARRAY: height = imageInfo->arraySize; break;
+        case CL_MEM_OBJECT_IMAGE2D: height = imageInfo->height; break;
+        case CL_MEM_OBJECT_IMAGE2D_ARRAY:
+            height = imageInfo->height;
+            depth = imageInfo->arraySize;
+            break;
+        case CL_MEM_OBJECT_IMAGE3D:
+            height = imageInfo->height;
+            depth = imageInfo->depth;
+            break;
+        default:
+            log_error("ERROR: Test does not support image type");
+            return TEST_FAIL;
+    }
+    return 0;
+}
+
+static bool InitFloatCoordsCommon(image_descriptor *imageInfo,
+                                  image_sampler_data *imageSampler,
+                                  float *xOffsets, float *yOffsets,
+                                  float *zOffsets, float xfract, float yfract,
+                                  float zfract, int normalized_coords, MTdata d,
+                                  int lod)
 {
     size_t i = 0;
-    if (gDisableOffsets)
+    size_t width_loop, height_loop, depth_loop;
+    bool error =
+        get_image_dimensions(imageInfo, width_loop, height_loop, depth_loop);
+    if (!error)
     {
-        for (size_t z = 0; z < imageInfo->depth; z++)
+        if (gDisableOffsets)
         {
-            for (size_t y = 0; y < imageInfo->height; y++)
+            for (size_t z = 0; z < depth_loop; z++)
             {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
+                for (size_t y = 0; y < height_loop; y++)
                 {
-                    xOffsets[i] = (float)(xfract + (double)x);
-                    yOffsets[i] = (float)(yfract + (double)y);
-                    zOffsets[i] = (float)(zfract + (double)z);
+                    for (size_t x = 0; x < width_loop; x++, i++)
+                    {
+                        xOffsets[i] = (float)(xfract + (double)x);
+                        yOffsets[i] = (float)(yfract + (double)y);
+                        zOffsets[i] = (float)(zfract + (double)z);
+                    }
                 }
             }
         }
-    }
-    else
-    {
-        for (size_t z = 0; z < imageInfo->depth; z++)
+        else
         {
-            for (size_t y = 0; y < imageInfo->height; y++)
+            for (size_t z = 0; z < depth_loop; z++)
             {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
+                for (size_t y = 0; y < height_loop; y++)
                 {
-                    xOffsets[i] =
-                        (float)(xfract
-                                + (double)((int)x
-                                           + random_in_range(-10, 10, d)));
-                    yOffsets[i] =
-                        (float)(yfract
-                                + (double)((int)y
-                                           + random_in_range(-10, 10, d)));
-                    zOffsets[i] =
-                        (float)(zfract
-                                + (double)((int)z
-                                           + random_in_range(-10, 10, d)));
+                    for (size_t x = 0; x < width_loop; x++, i++)
+                    {
+                        xOffsets[i] =
+                            (float)(xfract
+                                    + (double)((int)x
+                                               + random_in_range(-10, 10, d)));
+                        yOffsets[i] =
+                            (float)(yfract
+                                    + (double)((int)y
+                                               + random_in_range(-10, 10, d)));
+                        zOffsets[i] =
+                            (float)(zfract
+                                    + (double)((int)z
+                                               + random_in_range(-10, 10, d)));
+                    }
                 }
             }
         }
-    }
 
-    if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
-    {
-        i = 0;
-        for (size_t z = 0; z < imageInfo->depth; z++)
+        if (imageSampler->addressing_mode == CL_ADDRESS_NONE)
         {
-            for (size_t y = 0; y < imageInfo->height; y++)
+            i = 0;
+            for (size_t z = 0; z < depth_loop; z++)
             {
-                for (size_t x = 0; x < imageInfo->width; x++, i++)
+                for (size_t y = 0; y < height_loop; y++)
                 {
-                    xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
-                                               (double)imageInfo->width - 1.0);
-                    yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
-                                               (double)imageInfo->height - 1.0);
-                    zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
-                                               (double)imageInfo->depth - 1.0);
+                    for (size_t x = 0; x < width_loop; x++, i++)
+                    {
+                        xOffsets[i] = (float)CLAMP((double)xOffsets[i], 0.0,
+                                                   (double)width_loop - 1.0);
+                        yOffsets[i] = (float)CLAMP((double)yOffsets[i], 0.0,
+                                                   (double)height_loop - 1.0);
+                        zOffsets[i] = (float)CLAMP((double)zOffsets[i], 0.0,
+                                                   (double)depth_loop - 1.0);
+                    }
                 }
             }
         }
-    }
 
-    if (normalized_coords || gTestMipmaps)
-    {
-        i = 0;
-        if (lod == 0)
+        if (normalized_coords || gTestMipmaps)
         {
-            for (size_t z = 0; z < imageInfo->depth; z++)
+            i = 0;
+            if (lod == 0)
             {
-                for (size_t y = 0; y < imageInfo->height; y++)
+                for (size_t z = 0; z < depth_loop; z++)
                 {
-                    for (size_t x = 0; x < imageInfo->width; x++, i++)
+                    for (size_t y = 0; y < height_loop; y++)
                     {
-                        xOffsets[i] = (float)((double)xOffsets[i]
-                                              / (double)imageInfo->width);
-                        yOffsets[i] = (float)((double)yOffsets[i]
-                                              / (double)imageInfo->height);
-                        zOffsets[i] = (float)((double)zOffsets[i]
-                                              / (double)imageInfo->depth);
+                        for (size_t x = 0; x < width_loop; x++, i++)
+                        {
+                            xOffsets[i] = (float)((double)xOffsets[i]
+                                                  / (double)width_loop);
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                            {
+                                yOffsets[i] = (float)((double)yOffsets[i]
+                                                      / (double)height_loop);
+                            }
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                            {
+                                zOffsets[i] = (float)((double)zOffsets[i]
+                                                      / (double)depth_loop);
+                            }
+                        }
                     }
                 }
             }
-        }
-        else if (gTestMipmaps)
-        {
-            size_t width_lod, height_lod, depth_lod;
-
-            width_lod =
-                (imageInfo->width >> lod) ? (imageInfo->width >> lod) : 1;
-            height_lod =
-                (imageInfo->height >> lod) ? (imageInfo->height >> lod) : 1;
-            depth_lod =
-                (imageInfo->depth >> lod) ? (imageInfo->depth >> lod) : 1;
-
-            for (size_t z = 0; z < depth_lod; z++)
+            else if (gTestMipmaps)
             {
-                for (size_t y = 0; y < height_lod; y++)
+                size_t width_lod =
+                    (width_loop >> lod) ? (width_loop >> lod) : 1;
+                size_t height_lod = height_loop;
+                size_t depth_lod = depth_loop;
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
                 {
-                    for (size_t x = 0; x < width_lod; x++, i++)
+                    height_lod =
+                        (height_loop >> lod) ? (height_loop >> lod) : 1;
+                }
+                if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                {
+                    depth_lod = (depth_loop >> lod) ? (depth_loop >> lod) : 1;
+                }
+
+                for (size_t z = 0; z < depth_lod; z++)
+                {
+                    for (size_t y = 0; y < height_lod; y++)
                     {
-                        xOffsets[i] =
-                            (float)((double)xOffsets[i] / (double)width_lod);
-                        yOffsets[i] =
-                            (float)((double)yOffsets[i] / (double)height_lod);
-                        zOffsets[i] =
-                            (float)((double)zOffsets[i] / (double)depth_lod);
+                        for (size_t x = 0; x < width_lod; x++, i++)
+                        {
+                            xOffsets[i] = (float)((double)xOffsets[i]
+                                                  / (double)width_lod);
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+                            {
+                                yOffsets[i] = (float)((double)yOffsets[i]
+                                                      / (double)height_lod);
+                            }
+                            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+                            {
+                                zOffsets[i] = (float)((double)zOffsets[i]
+                                                      / (double)depth_lod);
+                            }
+                        }
                     }
                 }
             }
         }
     }
+    return error;
+}
+
+cl_mem create_image_of_type(cl_context context, cl_mem_flags mem_flags,
+                            image_descriptor *imageInfo, size_t row_pitch,
+                            size_t slice_pitch, void *host_ptr, cl_int *error)
+{
+    cl_mem image;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE3D:
+            image = create_image_3d(context, mem_flags, imageInfo->format,
+                                    imageInfo->width, imageInfo->height,
+                                    imageInfo->depth, row_pitch, slice_pitch,
+                                    host_ptr, error);
+            break;
+        default:
+            log_error("Implementation is incomplete, only 3D images are "
+                      "supported so far");
+            return nullptr;
+    }
+    return image;
+}
+
+static size_t get_image_num_pixels(image_descriptor *imageInfo, size_t width,
+                                   size_t height, size_t depth,
+                                   size_t array_size)
+{
+    size_t image_size;
+    switch (imageInfo->type)
+    {
+        case CL_MEM_OBJECT_IMAGE3D: image_size = width * height * depth; break;
+        default:
+            log_error("Implementation is incomplete, only 3D images are "
+                      "supported so far");
+            return 0;
+    }
+    return image_size;
 }
 
 int test_read_image(cl_context context, cl_command_queue queue,
@@ -161,6 +249,17 @@ int test_read_image(cl_context context, cl_command_queue queue,
     size_t threads[3];
     static int initHalf = 0;
 
+    size_t image_size =
+        get_image_num_pixels(imageInfo, imageInfo->width, imageInfo->height,
+                             imageInfo->depth, imageInfo->arraySize);
+    test_assert_error(0 != image_size, "Invalid image size");
+    size_t width_size, height_size, depth_size;
+    if (get_image_dimensions(imageInfo, width_size, height_size, depth_size))
+    {
+        log_error("ERROR: invalid image dimensions");
+        return CL_INVALID_VALUE;
+    }
+
     cl_mem_flags image_read_write_flags = CL_MEM_READ_ONLY;
 
     clMemWrapper xOffsets, yOffsets, zOffsets, results;
@@ -169,14 +268,11 @@ int test_read_image(cl_context context, cl_command_queue queue,
 
     // Create offset data
     BufferOwningPtr<cl_float> xOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
     BufferOwningPtr<cl_float> yOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
     BufferOwningPtr<cl_float> zOffsetValues(
-        malloc(sizeof(cl_float) * imageInfo->width * imageInfo->height
-               * imageInfo->depth));
+        malloc(sizeof(cl_float) * image_size));
 
     if (imageInfo->format->image_channel_data_type == CL_HALF_FLOAT)
         if (DetectFloatToHalfRoundingMode(queue)) return 1;
@@ -207,26 +303,27 @@ int test_read_image(cl_context context, cl_command_queue queue,
         {
             generate_random_image_data(imageInfo,
                                        maxImageUseHostPtrBackingStore, d);
-            unprotImage = create_image_3d(
+            unprotImage = create_image_of_type(
                 context, image_read_write_flags | CL_MEM_USE_HOST_PTR,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+                imageInfo, (gEnablePitch ? imageInfo->rowPitch : 0),
                 (gEnablePitch ? imageInfo->slicePitch : 0),
                 maxImageUseHostPtrBackingStore, &error);
         }
         else
         {
-            error = protImage.Create(context, image_read_write_flags,
-                                     imageInfo->format, imageInfo->width,
-                                     imageInfo->height, imageInfo->depth);
+            error = protImage.Create(context, imageInfo->type,
+                                     image_read_write_flags, imageInfo->format,
+                                     imageInfo->width, imageInfo->height,
+                                     imageInfo->depth, imageInfo->arraySize);
         }
         if (error != CL_SUCCESS)
         {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+            log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
                       "(pitch %d, %d ) (%s)",
                       (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
+                      (int)imageInfo->depth, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                      IGetErrorString(error));
             return error;
         }
         if (gTestMaxImages)
@@ -238,18 +335,18 @@ int test_read_image(cl_context context, cl_command_queue queue,
     {
         // Don't use clEnqueueWriteImage; just use copy host ptr to get the data
         // in
-        unprotImage = create_image_3d(
-            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR,
-            imageInfo->format, imageInfo->width, imageInfo->height,
-            imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+        unprotImage = create_image_of_type(
+            context, image_read_write_flags | CL_MEM_COPY_HOST_PTR, imageInfo,
+            (gEnablePitch ? imageInfo->rowPitch : 0),
             (gEnablePitch ? imageInfo->slicePitch : 0), imageValues, &error);
         if (error != CL_SUCCESS)
         {
-            log_error("ERROR: Unable to create 3D image of size %d x %d x %d "
+            log_error("ERROR: Unable to create image of size %d x %d x %d x %d "
                       "(pitch %d, %d ) (%s)",
                       (int)imageInfo->width, (int)imageInfo->height,
-                      (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                      (int)imageInfo->slicePitch, IGetErrorString(error));
+                      (int)imageInfo->depth, (int)imageInfo->arraySize,
+                      (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                      IGetErrorString(error));
             return error;
         }
         image = unprotImage;
@@ -261,19 +358,19 @@ int test_read_image(cl_context context, cl_command_queue queue,
         // specified, so we just do the same thing either way
         if (!gTestMipmaps)
         {
-            unprotImage = create_image_3d(
-                context, image_read_write_flags | gMemFlagsToUse,
-                imageInfo->format, imageInfo->width, imageInfo->height,
-                imageInfo->depth, (gEnablePitch ? imageInfo->rowPitch : 0),
+            unprotImage = create_image_of_type(
+                context, image_read_write_flags | gMemFlagsToUse, imageInfo,
+                (gEnablePitch ? imageInfo->rowPitch : 0),
                 (gEnablePitch ? imageInfo->slicePitch : 0), imageValues,
                 &error);
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to create 3D image of size %d x %d x "
-                          "%d (pitch %d, %d ) (%s)",
+                log_error("ERROR: Unable to create image of size %d x %d x "
+                          "%d x %d (pitch %d, %d ) (%s)",
                           (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth, (int)imageInfo->rowPitch,
-                          (int)imageInfo->slicePitch, IGetErrorString(error));
+                          (int)imageInfo->depth, (int)imageInfo->arraySize,
+                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
+                          IGetErrorString(error));
                 return error;
             }
             image = unprotImage;
@@ -281,10 +378,11 @@ int test_read_image(cl_context context, cl_command_queue queue,
         else
         {
             cl_image_desc image_desc = { 0 };
-            image_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
+            image_desc.image_type = imageInfo->type;
             image_desc.image_width = imageInfo->width;
             image_desc.image_height = imageInfo->height;
             image_desc.image_depth = imageInfo->depth;
+            image_desc.image_array_size = imageInfo->arraySize;
             image_desc.num_mip_levels = imageInfo->num_mip_levels;
 
 
@@ -293,23 +391,24 @@ int test_read_image(cl_context context, cl_command_queue queue,
                               imageInfo->format, &image_desc, NULL, &error);
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to create %d level mipmapped 3D image "
-                          "of size %d x %d x %d (pitch %d, %d ) (%s)",
+                log_error("ERROR: Unable to create %d level mipmapped image "
+                          "of size %d x %d x %d x %d (pitch %d, %d ) (%s)",
                           (int)imageInfo->num_mip_levels, (int)imageInfo->width,
                           (int)imageInfo->height, (int)imageInfo->depth,
-                          (int)imageInfo->rowPitch, (int)imageInfo->slicePitch,
-                          IGetErrorString(error));
+                          (int)imageInfo->arraySize, (int)imageInfo->rowPitch,
+                          (int)imageInfo->slicePitch, IGetErrorString(error));
                 return error;
             }
             image = unprotImage;
         }
     }
 
+    test_assert_error(nullptr != image, "Image creation failed");
+
     if (gMemFlagsToUse != CL_MEM_COPY_HOST_PTR)
     {
         size_t origin[4] = { 0, 0, 0, 0 };
-        size_t region[3] = { imageInfo->width, imageInfo->height,
-                             imageInfo->depth };
+        size_t region[3] = { width_size, height_size, depth_size };
 
         if (gDebugTrace) log_info(" - Writing image...\n");
 
@@ -324,10 +423,10 @@ int test_read_image(cl_context context, cl_command_queue queue,
 
             if (error != CL_SUCCESS)
             {
-                log_error("ERROR: Unable to write to 3D image of size %d x %d "
-                          "x %d \n",
+                log_error("ERROR: Unable to write to image of size %d x %d "
+                          "x %d x %d\n",
                           (int)imageInfo->width, (int)imageInfo->height,
-                          (int)imageInfo->depth);
+                          (int)imageInfo->depth, (int)imageInfo->arraySize);
                 return error;
             }
         }
@@ -339,17 +438,15 @@ int test_read_image(cl_context context, cl_command_queue queue,
             {
                 origin[3] = i;
                 error = clEnqueueWriteImage(
-                    queue, image, CL_TRUE, origin, region,
-                    /*gEnablePitch ? imageInfo->rowPitch :*/ 0,
-                    /*gEnablePitch ? imageInfo->slicePitch :*/ 0,
+                    queue, image, CL_TRUE, origin, region, 0, 0,
                     ((char *)imageValues + nextLevelOffset), 0, NULL, NULL);
                 if (error != CL_SUCCESS)
                 {
-                    log_error("ERROR: Unable to write to %d level mipmapped 3D "
-                              "image of size %d x %d x %d\n",
+                    log_error("ERROR: Unable to write to %d level mipmapped "
+                              "image of size %d x %d x %d x %d\n",
                               (int)imageInfo->num_mip_levels,
                               (int)imageInfo->width, (int)imageInfo->height,
-                              (int)imageInfo->depth);
+                              (int)imageInfo->arraySize, (int)imageInfo->depth);
                     return error;
                 }
                 nextLevelOffset += region[0] * region[1] * region[2]
@@ -362,26 +459,21 @@ int test_read_image(cl_context context, cl_command_queue queue,
         }
     }
 
-    xOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              xOffsetValues, &error);
+    xOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, xOffsetValues, &error);
     test_error(error, "Unable to create x offset buffer");
-    yOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              yOffsetValues, &error);
+    yOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, yOffsetValues, &error);
     test_error(error, "Unable to create y offset buffer");
-    zOffsets = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
-                              sizeof(cl_float) * imageInfo->width
-                                  * imageInfo->height * imageInfo->depth,
-                              zOffsetValues, &error);
+    zOffsets =
+        clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
+                       sizeof(cl_float) * image_size, zOffsetValues, &error);
     test_error(error, "Unable to create y offset buffer");
-    results =
-        clCreateBuffer(context, CL_MEM_READ_WRITE,
-                       get_explicit_type_size(outputType) * 4 * imageInfo->width
-                           * imageInfo->height * imageInfo->depth,
-                       NULL, &error);
+    results = clCreateBuffer(
+        context, CL_MEM_READ_WRITE,
+        get_explicit_type_size(outputType) * 4 * image_size, NULL, &error);
     test_error(error, "Unable to create result buffer");
 
     // Create sampler to use
@@ -444,16 +536,19 @@ int test_read_image(cl_context context, cl_command_queue queue,
     }
 
     int nextLevelOffset = 0;
-    size_t width_lod = imageInfo->width, height_lod = imageInfo->height,
-           depth_lod = imageInfo->depth;
+    size_t width_lod = width_size, height_lod = height_size,
+           depth_lod = depth_size;
 
     // Loop over all mipmap levels, if we are testing mipmapped images.
     for (int lod = 0; (gTestMipmaps && lod < imageInfo->num_mip_levels)
          || (!gTestMipmaps && lod < 1);
          lod++)
     {
-        size_t resultValuesSize = width_lod * height_lod * depth_lod
-            * get_explicit_type_size(outputType) * 4;
+        size_t image_lod_size = get_image_num_pixels(
+            imageInfo, width_lod, height_lod, depth_lod, imageInfo->arraySize);
+        test_assert_error(0 != image_lod_size, "Invalid image size");
+        size_t resultValuesSize =
+            image_lod_size * get_explicit_type_size(outputType) * 4;
         BufferOwningPtr<char> resultValues(malloc(resultValuesSize));
         float lod_float = (float)lod;
         if (gTestMipmaps)
@@ -469,30 +564,25 @@ int test_read_image(cl_context context, cl_command_queue queue,
             float offset = float_offsets[q % float_offset_count];
 
             // Init the coordinates
-            InitFloatCoordsCommon(imageInfo, imageSampler, xOffsetValues,
-                                  yOffsetValues, zOffsetValues,
-                                  q >= float_offset_count ? -offset : offset,
-                                  q >= float_offset_count ? offset : -offset,
-                                  q >= float_offset_count ? -offset : offset,
-                                  imageSampler->normalized_coords, d, lod);
-
-            error =
-                clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     xOffsetValues, 0, NULL, NULL);
+            error = InitFloatCoordsCommon(
+                imageInfo, imageSampler, xOffsetValues, yOffsetValues,
+                zOffsetValues, q >= float_offset_count ? -offset : offset,
+                q >= float_offset_count ? offset : -offset,
+                q >= float_offset_count ? -offset : offset,
+                imageSampler->normalized_coords, d, lod);
+            test_error(error, "Unable to initialise coordinates");
+
+            error = clEnqueueWriteBuffer(queue, xOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         xOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write x offsets");
-            error =
-                clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     yOffsetValues, 0, NULL, NULL);
+            error = clEnqueueWriteBuffer(queue, yOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         yOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write y offsets");
-            error =
-                clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
-                                     sizeof(cl_float) * imageInfo->height
-                                         * imageInfo->width * imageInfo->depth,
-                                     zOffsetValues, 0, NULL, NULL);
+            error = clEnqueueWriteBuffer(queue, zOffsets, CL_TRUE, 0,
+                                         sizeof(cl_float) * image_size,
+                                         zOffsetValues, 0, NULL, NULL);
             test_error(error, "Unable to write z offsets");
 
 
@@ -511,11 +601,10 @@ int test_read_image(cl_context context, cl_command_queue queue,
             test_error(error, "Unable to run kernel");
 
             // Get results
-            error = clEnqueueReadBuffer(queue, results, CL_TRUE, 0,
-                                        width_lod * height_lod * depth_lod
-                                            * get_explicit_type_size(outputType)
-                                            * 4,
-                                        resultValues, 0, NULL, NULL);
+            error = clEnqueueReadBuffer(
+                queue, results, CL_TRUE, 0,
+                image_lod_size * get_explicit_type_size(outputType) * 4,
+                resultValues, 0, NULL, NULL);
             test_error(error, "Unable to read results from kernel");
             if (gDebugTrace) log_info("    results read\n");
 
@@ -1540,8 +1629,14 @@ int test_read_image(cl_context context, cl_command_queue queue,
             nextLevelOffset += width_lod * height_lod * depth_lod
                 * get_pixel_size(imageInfo->format);
             width_lod = (width_lod >> 1) ? (width_lod >> 1) : 1;
-            height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
-            depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+            if (imageInfo->type != CL_MEM_OBJECT_IMAGE1D_ARRAY)
+            {
+                height_lod = (height_lod >> 1) ? (height_lod >> 1) : 1;
+            }
+            if (imageInfo->type != CL_MEM_OBJECT_IMAGE2D_ARRAY)
+            {
+                depth_lod = (depth_lod >> 1) ? (depth_lod >> 1) : 1;
+            }
         }
     }
 
diff --git a/test_conformance/images/kernel_read_write/test_common.h b/test_conformance/images/kernel_read_write/test_common.h
index 656c41f4..fc95bee2 100644
--- a/test_conformance/images/kernel_read_write/test_common.h
+++ b/test_conformance/images/kernel_read_write/test_common.h
@@ -42,12 +42,8 @@ extern int test_read_image(cl_context context, cl_command_queue queue,
                            bool useFloatCoords, ExplicitType outputType,
                            MTdata d);
 
-extern void InitFloatCoordsCommon(image_descriptor *imageInfo,
-                                  image_sampler_data *imageSampler,
-                                  float *xOffsets, float *yOffsets,
-                                  float *zOffsets, float xfract, float yfract,
-                                  float zfract, int normalized_coords, MTdata d,
-                                  int lod);
+extern bool get_image_dimensions(image_descriptor *imageInfo, size_t &width,
+                                 size_t &height, size_t &depth);
 
 template <class T>
 int determine_validation_error_offset(
@@ -63,8 +59,12 @@ int determine_validation_error_offset(
     bool clampingErr = false, clamped = false, otherClampingBug = false;
     int clampedX, clampedY, clampedZ;
 
-    size_t imageWidth = imageInfo->width, imageHeight = imageInfo->height,
-           imageDepth = imageInfo->depth;
+    size_t imageWidth, imageHeight, imageDepth;
+    if (get_image_dimensions(imageInfo, imageWidth, imageHeight, imageDepth))
+    {
+        log_error("ERROR: invalid image dimensions");
+        return TEST_FAIL;
+    }
 
     clamped = get_integer_coords_offset(x, y, z, xAddressOffset, yAddressOffset,
                                         zAddressOffset, imageWidth, imageHeight,
@@ -147,82 +147,67 @@ int determine_validation_error_offset(
     }
     if (!clampingErr)
     {
-        /*        if( clamped && ( (int)x + (int)xOffsetValues[ j ] < 0 ||
-         (int)y + (int)yOffsetValues[ j ] < 0 ) )
-         {
-         log_error( "NEGATIVE COORDINATE ERROR\n" );
-         return -1;
-         }
-         */
-        if (true) // gExtraValidateInfo )
+        if (printAsFloat)
         {
-            if (printAsFloat)
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
-                          "(%g,%g,%g,%g), error of %g\n",
-                          j, x, x, y, y, z, z, (float)expected[0],
-                          (float)expected[1], (float)expected[2],
-                          (float)expected[3], (float)resultPtr[0],
-                          (float)resultPtr[1], (float)resultPtr[2],
-                          (float)resultPtr[3], error);
-            }
-            else
-            {
-                log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
-                          "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
-                          "(%x,%x,%x,%x)\n",
-                          j, x, x, y, y, z, z, (int)expected[0],
-                          (int)expected[1], (int)expected[2], (int)expected[3],
-                          (int)resultPtr[0], (int)resultPtr[1],
-                          (int)resultPtr[2], (int)resultPtr[3]);
-            }
-            log_error(
-                "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
-                clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
-                (int)imageDepth);
+            log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+                      "validate!\n\tExpected (%g,%g,%g,%g),\n\t     got "
+                      "(%g,%g,%g,%g), error of %g\n",
+                      j, x, x, y, y, z, z, (float)expected[0],
+                      (float)expected[1], (float)expected[2],
+                      (float)expected[3], (float)resultPtr[0],
+                      (float)resultPtr[1], (float)resultPtr[2],
+                      (float)resultPtr[3], error);
+        }
+        else
+        {
+            log_error("Sample %ld: coord {%f(%a),%f(%a),%f(%a)} did not "
+                      "validate!\n\tExpected (%x,%x,%x,%x),\n\t     got "
+                      "(%x,%x,%x,%x)\n",
+                      j, x, x, y, y, z, z, (int)expected[0], (int)expected[1],
+                      (int)expected[2], (int)expected[3], (int)resultPtr[0],
+                      (int)resultPtr[1], (int)resultPtr[2], (int)resultPtr[3]);
+        }
+        log_error(
+            "Integer coords resolve to %d,%d,%d   with img size %d,%d,%d\n",
+            clampedX, clampedY, clampedZ, (int)imageWidth, (int)imageHeight,
+            (int)imageDepth);
 
-            if (printAsFloat && gExtraValidateInfo)
+        if (printAsFloat && gExtraValidateInfo)
+        {
+            log_error("\nNearby values:\n");
+            for (int zOff = -1; zOff <= 1; zOff++)
             {
-                log_error("\nNearby values:\n");
-                for (int zOff = -1; zOff <= 1; zOff++)
+                for (int yOff = -1; yOff <= 1; yOff++)
                 {
-                    for (int yOff = -1; yOff <= 1; yOff++)
-                    {
-                        float top[4], real[4], bot[4];
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX - 1, clampedY + yOff,
-                                               clampedZ + zOff, top);
-                        read_image_pixel_float(imagePtr, imageInfo, clampedX,
-                                               clampedY + yOff, clampedZ + zOff,
-                                               real);
-                        read_image_pixel_float(imagePtr, imageInfo,
-                                               clampedX + 1, clampedY + yOff,
-                                               clampedZ + zOff, bot);
-                        log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
-                                  top[3]);
-                        log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
-                                  real[3]);
-                        log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
-                                  bot[3]);
-                    }
+                    float top[4], real[4], bot[4];
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX - 1,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           top);
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           real);
+                    read_image_pixel_float(imagePtr, imageInfo, clampedX + 1,
+                                           clampedY + yOff, clampedZ + zOff,
+                                           bot);
+                    log_error("\t(%g,%g,%g,%g)", top[0], top[1], top[2],
+                              top[3]);
+                    log_error(" (%g,%g,%g,%g)", real[0], real[1], real[2],
+                              real[3]);
+                    log_error(" (%g,%g,%g,%g)\n", bot[0], bot[1], bot[2],
+                              bot[3]);
                 }
             }
-            //        }
-            //        else
-            //            log_error( "\n" );
-            if (imageSampler->filter_mode != CL_FILTER_LINEAR)
-            {
-                if (found)
-                    log_error(
-                        "\tValue really found in image at %d,%d,%d (%s)\n",
-                        actualX, actualY, actualZ,
-                        (found > 1) ? "NOT unique!!" : "unique");
-                else
-                    log_error("\tValue not actually found in image\n");
-            }
-            log_error("\n");
         }
+        if (imageSampler->filter_mode != CL_FILTER_LINEAR)
+        {
+            if (found)
+                log_error("\tValue really found in image at %d,%d,%d (%s)\n",
+                          actualX, actualY, actualZ,
+                          (found > 1) ? "NOT unique!!" : "unique");
+            else
+                log_error("\tValue not actually found in image\n");
+        }
+        log_error("\n");
 
         numClamped = -1; // We force the clamped counter to never work
         if ((--numTries) == 0) return -1;
-- 
cgit v1.2.3


From 30500fba06973115cab6333d96d2b75d53476daa Mon Sep 17 00:00:00 2001
From: Sreelakshmi Haridas Maruthur <sharidas@quicinc.com>
Date: Tue, 27 Sep 2022 10:28:57 -0600
Subject: SVM: Fix memory allocation size. (#1514)

* SVM: Fix memory allocation size.

9ad48998 generally made memory allocation and mapping consistent with a
size of size_t. Apply that fix to the final two allocations.

* check-format fixes

Co-authored-by: spauls <spauls@qti.qualcomm.com>
---
 test_conformance/SVM/test_cross_buffer_pointers.cpp | 3 ++-
 test_conformance/SVM/test_shared_sub_buffers.cpp    | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/test_conformance/SVM/test_cross_buffer_pointers.cpp b/test_conformance/SVM/test_cross_buffer_pointers.cpp
index c1caebb9..2baa7ad7 100644
--- a/test_conformance/SVM/test_cross_buffer_pointers.cpp
+++ b/test_conformance/SVM/test_cross_buffer_pointers.cpp
@@ -162,7 +162,8 @@ int test_svm_cross_buffer_pointers_coarse_grain(cl_device_id deviceID, cl_contex
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the index into the nodes buffer that is used for node allocation
-    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                            sizeof(size_t), NULL, &error);
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the count of correct nodes which is computed by the verify kernel.
diff --git a/test_conformance/SVM/test_shared_sub_buffers.cpp b/test_conformance/SVM/test_shared_sub_buffers.cpp
index a79484c9..2532886e 100644
--- a/test_conformance/SVM/test_shared_sub_buffers.cpp
+++ b/test_conformance/SVM/test_shared_sub_buffers.cpp
@@ -182,7 +182,8 @@ int test_svm_shared_sub_buffers(cl_device_id deviceID, cl_context context2, cl_c
 
 
     // this buffer holds the index into the nodes buffer that is used for node allocation
-    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_int), NULL, &error);
+    clMemWrapper allocator = clCreateBuffer(context, CL_MEM_READ_WRITE,
+                                            sizeof(size_t), NULL, &error);
     test_error(error, "clCreateBuffer failed.");
 
     // this buffer holds the count of correct nodes which is computed by the verify kernel.
-- 
cgit v1.2.3


From 9b21e9f06b88e7ce96b76b0e94c6dfef644ac1ee Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 27 Sep 2022 17:29:58 +0100
Subject: [NFC] Avoid mixing signed and unsigned in subhelpers run (#1505)

Fix a `-Wsign-compare` warning in the `run()` function, which resulted
in many repeated warnings when compiling with `-Wall` due to the many
template instantiations.

Both `clGetKernelSubGroupInfo` queries return a `size_t`, so it is
unclear why the results of these queries were being cast to `int`.
The `dynsc` uses don't seem to work with negative values, so make the
field unsigned.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/subgroups/subhelpers.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/test_conformance/subgroups/subhelpers.h b/test_conformance/subgroups/subhelpers.h
index 0944ffb3..0a2c3903 100644
--- a/test_conformance/subgroups/subhelpers.h
+++ b/test_conformance/subgroups/subhelpers.h
@@ -72,7 +72,7 @@ struct WorkGroupParams
     size_t subgroup_size;
     cl_uint cluster_size;
     bs128 work_items_mask;
-    int dynsc;
+    size_t dynsc;
     bool use_core_subgroups;
     std::vector<bs128> all_work_item_masks;
     int divergence_mask_arg;
@@ -1495,7 +1495,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
     {
         size_t tmp;
         cl_int error;
-        int subgroup_size, num_subgroups;
+        size_t subgroup_size, num_subgroups;
         size_t global = test_params.global_workgroup_size;
         size_t local = test_params.local_workgroup_size;
         clProgramWrapper program;
@@ -1580,7 +1580,7 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             return TEST_FAIL;
         }
 
-        subgroup_size = (int)tmp;
+        subgroup_size = tmp;
 
         error = clGetKernelSubGroupInfo_ptr(
             kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
@@ -1593,11 +1593,11 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
             return TEST_FAIL;
         }
 
-        num_subgroups = (int)tmp;
+        num_subgroups = tmp;
         // Make sure the number of sub groups is what we expect
         if (num_subgroups != (local + subgroup_size - 1) / subgroup_size)
         {
-            log_error("ERROR: unexpected number of subgroups (%d) returned\n",
+            log_error("ERROR: unexpected number of subgroups (%zu) returned\n",
                       num_subgroups);
             return TEST_FAIL;
         }
@@ -1606,13 +1606,12 @@ template <typename Ty, typename Fns, size_t TSIZE = 0> struct test
         std::vector<Ty> odata;
         size_t input_array_size = global;
         size_t output_array_size = global;
-        int dynscl = test_params.dynsc;
+        size_t dynscl = test_params.dynsc;
 
         if (dynscl != 0)
         {
-            input_array_size =
-                (int)global / (int)local * num_subgroups * dynscl;
-            output_array_size = (int)global / (int)local * dynscl;
+            input_array_size = global / local * num_subgroups * dynscl;
+            output_array_size = global / local * dynscl;
         }
 
         idata.resize(input_array_size);
-- 
cgit v1.2.3


From 9bf6486352bf4c87a49ecb212ae71f96c293c26f Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 27 Sep 2022 17:32:23 +0100
Subject: [NFC] clang-format test_atomics (#1516)

Add some clang-format off/on comments to keep lists and kernel code
readable.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/atomics/main.cpp               |    7 +-
 test_conformance/atomics/procs.h                |   49 +-
 test_conformance/atomics/testBase.h             |    5 +-
 test_conformance/atomics/test_atomics.cpp       | 1255 ++++++++++++++---------
 test_conformance/atomics/test_indexed_cases.cpp |  507 +++++----
 5 files changed, 1143 insertions(+), 680 deletions(-)

diff --git a/test_conformance/atomics/main.cpp b/test_conformance/atomics/main.cpp
index afdea376..987d6bfa 100644
--- a/test_conformance/atomics/main.cpp
+++ b/test_conformance/atomics/main.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -24,6 +24,7 @@
 #include <unistd.h>
 #endif
 
+// clang-format off
 test_definition test_list[] = {
     ADD_TEST( atomic_add ),
     ADD_TEST( atomic_sub ),
@@ -40,11 +41,11 @@ test_definition test_list[] = {
     ADD_TEST( atomic_add_index ),
     ADD_TEST( atomic_add_index_bin ),
 };
+// clang-format on
 
-const int test_num = ARRAY_SIZE( test_list );
+const int test_num = ARRAY_SIZE(test_list);
 
 int main(int argc, const char *argv[])
 {
     return runTestHarness(argc, argv, test_num, test_list, false, 0);
 }
-
diff --git a/test_conformance/atomics/procs.h b/test_conformance/atomics/procs.h
index bf053f25..fa85aad5 100644
--- a/test_conformance/atomics/procs.h
+++ b/test_conformance/atomics/procs.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -18,22 +18,35 @@
 #include "harness/threadTesting.h"
 #include "harness/typeWrappers.h"
 
-extern int      create_program_and_kernel(const char *source, const char *kernel_name, cl_program *program_ret, cl_kernel *kernel_ret);
-
-extern int        test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
-extern int        test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-extern int        test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements);
-
+extern int create_program_and_kernel(const char *source,
+                                     const char *kernel_name,
+                                     cl_program *program_ret,
+                                     cl_kernel *kernel_ret);
 
+extern int test_atomic_add(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_sub(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+                            cl_command_queue queue, int num_elements);
+extern int test_atomic_min(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_max(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_inc(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_dec(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+                               cl_command_queue queue, int num_elements);
+extern int test_atomic_and(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
+extern int test_atomic_or(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements);
+extern int test_atomic_xor(cl_device_id deviceID, cl_context context,
+                           cl_command_queue queue, int num_elements);
 
+extern int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+                                 cl_command_queue queue, int num_elements);
+extern int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+                                     cl_command_queue queue, int num_elements);
diff --git a/test_conformance/atomics/testBase.h b/test_conformance/atomics/testBase.h
index ba67d140..22bce1d2 100644
--- a/test_conformance/atomics/testBase.h
+++ b/test_conformance/atomics/testBase.h
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,6 +26,3 @@
 #include "procs.h"
 
 #endif // _testBase_h
-
-
-
diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index c0c01363..31d08500 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -22,7 +22,7 @@
 #define INT_TEST_VALUE 402258822
 #define LONG_TEST_VALUE 515154531254381446LL
 
-
+// clang-format off
 const char *atomic_global_pattern[] = {
     "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
     "{\n"
@@ -36,19 +36,20 @@ const char *atomic_local_pattern[] = {
     "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
     "{\n"
     "    int  tid = get_global_id(0);\n"
-    "     int  dstItemIdx;\n"
+    "    int  dstItemIdx;\n"
     "\n"
     "    // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
-    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "    for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
     "        destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
     "    barrier( CLK_LOCAL_MEM_FENCE );\n"
     "\n"
     ,
     "    barrier( CLK_LOCAL_MEM_FENCE );\n"
     "    // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
-    "     for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
+    "    for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
     "        finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
     "}\n" };
+// clang-format on
 
 
 #define TEST_COUNT 128 * 1024
@@ -56,41 +57,48 @@ const char *atomic_local_pattern[] = {
 
 struct TestFns
 {
-    cl_int    mIntStartValue;
-    cl_long    mLongStartValue;
+    cl_int mIntStartValue;
+    cl_long mLongStartValue;
 
-    size_t    (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
+    size_t (*NumResultsFn)(size_t threadSize, ExplicitType dataType);
 
     // Integer versions
-    cl_int    (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
-    void    (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
-    bool    (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
+    cl_int (*ExpectedValueIntFn)(size_t size, cl_int *startRefValues,
+                                 size_t whichDestValue);
+    void (*GenerateRefsIntFn)(size_t size, cl_int *startRefValues, MTdata d);
+    bool (*VerifyRefsIntFn)(size_t size, cl_int *refValues, cl_int finalValue);
 
     // Long versions
-    cl_long    (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
-    void    (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
-    bool    (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
+    cl_long (*ExpectedValueLongFn)(size_t size, cl_long *startRefValues,
+                                   size_t whichDestValue);
+    void (*GenerateRefsLongFn)(size_t size, cl_long *startRefValues, MTdata d);
+    bool (*VerifyRefsLongFn)(size_t size, cl_long *refValues,
+                             cl_long finalValue);
 
     // Float versions
-    cl_float    (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
-    void        (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
-    bool        (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
+    cl_float (*ExpectedValueFloatFn)(size_t size, cl_float *startRefValues,
+                                     size_t whichDestValue);
+    void (*GenerateRefsFloatFn)(size_t size, cl_float *startRefValues,
+                                MTdata d);
+    bool (*VerifyRefsFloatFn)(size_t size, cl_float *refValues,
+                              cl_float finalValue);
 };
 
-bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
+bool check_atomic_support(cl_device_id device, bool extended, bool isLocal,
+                          ExplicitType dataType)
 {
+    // clang-format off
     const char *extensionNames[8] = {
         "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
         "cl_khr_local_int32_base_atomics",  "cl_khr_local_int32_extended_atomics",
         "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics",
         "cl_khr_int64_base_atomics",        "cl_khr_int64_extended_atomics"       // this line intended to be the same as the last one
     };
+    // clang-format on
 
     size_t index = 0;
-    if( extended )
-        index += 1;
-    if( isLocal )
-        index += 2;
+    if (extended) index += 1;
+    if (isLocal) index += 2;
 
     Version version = get_device_cl_version(device);
 
@@ -98,26 +106,28 @@ bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, Exp
     {
         case kInt:
         case kUInt:
-            if( version >= Version(1,1) )
-                return 1;
+            if (version >= Version(1, 1)) return 1;
             break;
         case kLong:
-        case kULong:
-            index += 4;
-            break;
-        case kFloat:  // this has to stay separate since the float atomics arent in the 1.0 extensions
-            return version >= Version(1,1);
+        case kULong: index += 4; break;
+        case kFloat: // this has to stay separate since the float atomics arent
+                     // in the 1.0 extensions
+            return version >= Version(1, 1);
         default:
-            log_error( "ERROR:  Unsupported data type (%d) in check_atomic_support\n", dataType );
+            log_error(
+                "ERROR:  Unsupported data type (%d) in check_atomic_support\n",
+                dataType);
             return 0;
     }
 
-    return is_extension_available( device, extensionNames[index] );
+    return is_extension_available(device, extensionNames[index]);
 }
 
-int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
-                         TestFns testFns,
-                         bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
+int test_atomic_function(cl_device_id deviceID, cl_context context,
+                         cl_command_queue queue, int num_elements,
+                         const char *programCore, TestFns testFns,
+                         bool extended, bool isLocal, ExplicitType dataType,
+                         bool matchGroupSize)
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
@@ -127,55 +137,65 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
     void *refValues, *startRefValues;
     size_t threadSize, groupSize;
     const char *programLines[4];
-    char pragma[ 512 ];
-    char programHeader[ 512 ];
+    char pragma[512];
+    char programHeader[512];
     MTdata d;
-    size_t typeSize = get_explicit_type_size( dataType );
+    size_t typeSize = get_explicit_type_size(dataType);
 
 
     // Verify we can run first
-    bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
-    if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
+    bool isUnsigned = (dataType == kULong) || (dataType == kUInt);
+    if (!check_atomic_support(deviceID, extended, isLocal, dataType))
     {
-        // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
-        if( dataType == kFloat )
-            log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
-        else if( !isUnsigned )
-            log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
+        // Only print for the signed (unsigned comes right after, and if signed
+        // isn't supported, unsigned isn't either)
+        if (dataType == kFloat)
+            log_info("\t%s float not supported\n",
+                     isLocal ? "Local" : "Global");
+        else if (!isUnsigned)
+            log_info("\t%s %sint%d not supported\n",
+                     isLocal ? "Local" : "Global", isUnsigned ? "u" : "",
+                     (int)typeSize * 8);
         // Since we don't support the operation, they implicitly pass
         return 0;
     }
     else
     {
-        if( dataType == kFloat )
-            log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
+        if (dataType == kFloat)
+            log_info("\t%s float%s...", isLocal ? "local" : "global",
+                     isLocal ? " " : "");
         else
-            log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
-                     (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
+            log_info("\t%s %sint%d%s%s...", isLocal ? "local" : "global",
+                     isUnsigned ? "u" : "", (int)typeSize * 8,
+                     isUnsigned ? "" : " ", isLocal ? " " : "");
     }
 
     //// Set up the kernel code
 
     // Create the pragma line for this kernel
-    bool isLong = ( dataType == kLong || dataType == kULong );
-    sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
-            isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
-            extended ? "extended" : "base" );
+    bool isLong = (dataType == kLong || dataType == kULong);
+    sprintf(pragma,
+            "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
+            isLong ? "" : (isLocal ? "_local" : "_global"),
+            isLong ? "64" : "32", extended ? "extended" : "base");
 
     // Now create the program header
-    const char *typeName = get_explicit_type_name( dataType );
-    if( isLocal )
-        sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
+    const char *typeName = get_explicit_type_name(dataType);
+    if (isLocal)
+        sprintf(programHeader, atomic_local_pattern[0], typeName, typeName,
+                typeName);
     else
-        sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
+        sprintf(programHeader, atomic_global_pattern[0], typeName, typeName);
 
     // Set up our entire program now
-    programLines[ 0 ] = pragma;
-    programLines[ 1 ] = programHeader;
-    programLines[ 2 ] = programCore;
-    programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
-
-    if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
+    programLines[0] = pragma;
+    programLines[1] = programHeader;
+    programLines[2] = programCore;
+    programLines[3] =
+        (isLocal) ? atomic_local_pattern[1] : atomic_global_pattern[1];
+
+    if (create_single_kernel_helper(context, &program, &kernel, 4, programLines,
+                                    "test_atomic_fn"))
     {
         return -1;
     }
@@ -183,29 +203,37 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
     //// Set up to actually run
     threadSize = num_elements;
 
-    error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
-    test_error( error, "Unable to get thread group max size" );
+    error =
+        get_max_common_work_group_size(context, kernel, threadSize, &groupSize);
+    test_error(error, "Unable to get thread group max size");
 
-    if( matchGroupSize )
+    if (matchGroupSize)
         // HACK because xchg and cmpxchg apparently are limited by hardware
         threadSize = groupSize;
 
-    if( isLocal )
+    if (isLocal)
     {
-        size_t maxSizes[3] = {0, 0, 0};
-        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
-        test_error( error, "Unable to obtain max work item sizes for the device" );
+        size_t maxSizes[3] = { 0, 0, 0 };
+        error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                3 * sizeof(size_t), maxSizes, 0);
+        test_error(error,
+                   "Unable to obtain max work item sizes for the device");
 
         size_t workSize;
-        error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
-        test_error( error, "Unable to obtain max work group size for device and kernel combo" );
+        error = clGetKernelWorkGroupInfo(kernel, deviceID,
+                                         CL_KERNEL_WORK_GROUP_SIZE,
+                                         sizeof(workSize), &workSize, NULL);
+        test_error(
+            error,
+            "Unable to obtain max work group size for device and kernel combo");
 
         // Limit workSize to avoid extremely large local buffer size and slow
         // run.
         if (workSize > 65536) workSize = 65536;
 
-        // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
-        if( maxSizes[0] < workSize )
+        // "workSize" is limited to that of the first dimension as only a
+        // 1DRange is executed.
+        if (maxSizes[0] < workSize)
         {
             workSize = maxSizes[0];
         }
@@ -214,38 +242,43 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
     }
 
 
-    log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
+    log_info("\t(thread count %d, group size %d)\n", (int)threadSize,
+             (int)groupSize);
 
-    refValues = (cl_int *)malloc( typeSize * threadSize );
+    refValues = (cl_int *)malloc(typeSize * threadSize);
 
-    if( testFns.GenerateRefsIntFn != NULL )
+    if (testFns.GenerateRefsIntFn != NULL)
     {
         // We have a ref generator provided
-        d = init_genrand( gRandomSeed );
-        startRefValues = malloc( typeSize * threadSize );
-        if( typeSize == 4 )
-            testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
+        d = init_genrand(gRandomSeed);
+        startRefValues = malloc(typeSize * threadSize);
+        if (typeSize == 4)
+            testFns.GenerateRefsIntFn(threadSize, (cl_int *)startRefValues, d);
         else
-            testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
+            testFns.GenerateRefsLongFn(threadSize, (cl_long *)startRefValues,
+                                       d);
         free_mtdata(d);
         d = NULL;
     }
     else
         startRefValues = NULL;
 
-    // If we're given a num_results function, we need to determine how many result objects we need. If
-    // we don't have it, we assume it's just 1
-    size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
+    // If we're given a num_results function, we need to determine how many
+    // result objects we need. If we don't have it, we assume it's just 1
+    size_t numDestItems = (testFns.NumResultsFn != NULL)
+        ? testFns.NumResultsFn(threadSize, dataType)
+        : 1;
 
-    char * destItems = new char[ typeSize * numDestItems ];
-    if( destItems == NULL )
+    char *destItems = new char[typeSize * numDestItems];
+    if (destItems == NULL)
     {
-        log_error( "ERROR: Unable to allocate memory!\n" );
+        log_error("ERROR: Unable to allocate memory!\n");
         return -1;
     }
-    void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
-    for( size_t i = 0; i < numDestItems; i++ )
-        memcpy( destItems + i * typeSize, startValue, typeSize );
+    void *startValue = (typeSize == 4) ? (void *)&testFns.mIntStartValue
+                                       : (void *)&testFns.mLongStartValue;
+    for (size_t i = 0; i < numDestItems; i++)
+        memcpy(destItems + i * typeSize, startValue, typeSize);
 
     streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
                                 typeSize * numDestItems, destItems, NULL);
@@ -265,82 +298,96 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
     }
 
     /* Set the arguments */
-    error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
-    test_error( error, "Unable to set indexed kernel arguments" );
-    error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
-    test_error( error, "Unable to set indexed kernel arguments" );
+    error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
+    test_error(error, "Unable to set indexed kernel arguments");
+    error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
+    test_error(error, "Unable to set indexed kernel arguments");
 
-    if( isLocal )
+    if (isLocal)
     {
-        error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
-        test_error( error, "Unable to set indexed local kernel argument" );
+        error = clSetKernelArg(kernel, 2, typeSize * numDestItems, NULL);
+        test_error(error, "Unable to set indexed local kernel argument");
 
         cl_int numDestItemsInt = (cl_int)numDestItems;
-        error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
-        test_error( error, "Unable to set indexed kernel argument" );
+        error = clSetKernelArg(kernel, 3, sizeof(cl_int), &numDestItemsInt);
+        test_error(error, "Unable to set indexed kernel argument");
     }
 
     /* Run the kernel */
     threads[0] = threadSize;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
-
-    error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
-    test_error( error, "Unable to read result value!" );
-
-    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
-    test_error( error, "Unable to read reference values!" );
-
-    // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
-    // have one, it's because we're comparing ref values only
-    if( testFns.ExpectedValueIntFn != NULL )
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, &groupSize,
+                                   0, NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
+
+    error =
+        clEnqueueReadBuffer(queue, streams[0], true, 0, typeSize * numDestItems,
+                            destItems, 0, NULL, NULL);
+    test_error(error, "Unable to read result value!");
+
+    error =
+        clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize * threadSize,
+                            refValues, 0, NULL, NULL);
+    test_error(error, "Unable to read reference values!");
+
+    // If we have an expectedFn, then we need to generate a final value to
+    // compare against. If we don't have one, it's because we're comparing ref
+    // values only
+    if (testFns.ExpectedValueIntFn != NULL)
     {
-        for( size_t i = 0; i < numDestItems; i++ )
+        for (size_t i = 0; i < numDestItems; i++)
         {
-            char expected[ 8 ];
+            char expected[8];
             cl_int intVal;
             cl_long longVal;
-            if( typeSize == 4 )
+            if (typeSize == 4)
             {
                 // Int version
-                intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
-                memcpy( expected, &intVal, sizeof( intVal ) );
+                intVal = testFns.ExpectedValueIntFn(
+                    threadSize, (cl_int *)startRefValues, i);
+                memcpy(expected, &intVal, sizeof(intVal));
             }
             else
             {
                 // Long version
-                longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
-                memcpy( expected, &longVal, sizeof( longVal ) );
+                longVal = testFns.ExpectedValueLongFn(
+                    threadSize, (cl_long *)startRefValues, i);
+                memcpy(expected, &longVal, sizeof(longVal));
             }
 
-            if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
+            if (memcmp(expected, destItems + i * typeSize, typeSize) != 0)
             {
-                if( typeSize == 4 )
+                if (typeSize == 4)
                 {
-                    cl_int *outValue = (cl_int *)( destItems + i * typeSize );
-                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
+                    cl_int *outValue = (cl_int *)(destItems + i * typeSize);
+                    log_error("ERROR: Result %ld from kernel does not "
+                              "validate! (should be %d, was %d)\n",
+                              i, intVal, *outValue);
                     cl_int *startRefs = (cl_int *)startRefValues;
                     cl_int *refs = (cl_int *)refValues;
-                    for( i = 0; i < threadSize; i++ )
+                    for (i = 0; i < threadSize; i++)
                     {
-                        if( startRefs != NULL )
-                            log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
+                        if (startRefs != NULL)
+                            log_info(" --- %ld - %d --- %d\n", i, startRefs[i],
+                                     refs[i]);
                         else
-                            log_info( " --- %ld --- %d\n", i, refs[i] );
+                            log_info(" --- %ld --- %d\n", i, refs[i]);
                     }
                 }
                 else
                 {
-                    cl_long *outValue = (cl_long *)( destItems + i * typeSize );
-                    log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
+                    cl_long *outValue = (cl_long *)(destItems + i * typeSize);
+                    log_error("ERROR: Result %ld from kernel does not "
+                              "validate! (should be %lld, was %lld)\n",
+                              i, longVal, *outValue);
                     cl_long *startRefs = (cl_long *)startRefValues;
                     cl_long *refs = (cl_long *)refValues;
-                    for( i = 0; i < threadSize; i++ )
+                    for (i = 0; i < threadSize; i++)
                     {
-                        if( startRefs != NULL )
-                            log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
+                        if (startRefs != NULL)
+                            log_info(" --- %ld - %lld --- %lld\n", i,
+                                     startRefs[i], refs[i]);
                         else
-                            log_info( " --- %ld --- %lld\n", i, refs[i] );
+                            log_info(" --- %ld --- %lld\n", i, refs[i]);
                     }
                 }
                 return -1;
@@ -348,104 +395,140 @@ int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_q
         }
     }
 
-    if( testFns.VerifyRefsIntFn != NULL )
+    if (testFns.VerifyRefsIntFn != NULL)
     {
         /* Use the verify function to also check the results */
-        if( dataType == kFloat )
+        if (dataType == kFloat)
         {
             cl_float *outValue = (cl_float *)destItems;
-            if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsFloatFn(threadSize, (cl_float *)refValues,
+                                           *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
-        else if( typeSize == 4 )
+        else if (typeSize == 4)
         {
             cl_int *outValue = (cl_int *)destItems;
-            if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsIntFn(threadSize, (cl_int *)refValues,
+                                         *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
         else
         {
             cl_long *outValue = (cl_long *)destItems;
-            if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
+            if (!testFns.VerifyRefsLongFn(threadSize, (cl_long *)refValues,
+                                          *outValue)
+                != 0)
             {
-                log_error( "ERROR: Reference values did not validate!\n" );
+                log_error("ERROR: Reference values did not validate!\n");
                 return -1;
             }
         }
     }
-    else if( testFns.ExpectedValueIntFn == NULL )
+    else if (testFns.ExpectedValueIntFn == NULL)
     {
-        log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
+        log_error("ERROR: Test doesn't check total or refs; no values are "
+                  "verified!\n");
         return -1;
     }
 
 
     /* Re-write the starting value */
-    for( size_t i = 0; i < numDestItems; i++ )
-        memcpy( destItems + i * typeSize, startValue, typeSize );
-    error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
-    test_error( error, "Unable to write starting values!" );
-
-    /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
+    for (size_t i = 0; i < numDestItems; i++)
+        memcpy(destItems + i * typeSize, startValue, typeSize);
+    error =
+        clEnqueueWriteBuffer(queue, streams[0], true, 0,
+                             typeSize * numDestItems, destItems, 0, NULL, NULL);
+    test_error(error, "Unable to write starting values!");
+
+    /* Run the kernel once for a single thread, so we can verify that the
+     * returned value is the original one */
     threads[0] = 1;
-    error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
-    test_error( error, "Unable to execute test kernel" );
+    error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads, threads, 0,
+                                   NULL, NULL);
+    test_error(error, "Unable to execute test kernel");
 
-    error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
-    test_error( error, "Unable to read reference values!" );
+    error = clEnqueueReadBuffer(queue, streams[1], true, 0, typeSize, refValues,
+                                0, NULL, NULL);
+    test_error(error, "Unable to read reference values!");
 
-    if( memcmp( refValues, destItems, typeSize ) != 0 )
+    if (memcmp(refValues, destItems, typeSize) != 0)
     {
-        if( typeSize == 4 )
+        if (typeSize == 4)
         {
             cl_int *s = (cl_int *)destItems;
             cl_int *r = (cl_int *)refValues;
-            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
-                      " (should have been %d, returned %d)!\n", *s, *r );
+            log_error("ERROR: atomic function operated correctly but did NOT "
+                      "return correct 'old' value "
+                      " (should have been %d, returned %d)!\n",
+                      *s, *r);
         }
         else
         {
             cl_long *s = (cl_long *)destItems;
             cl_long *r = (cl_long *)refValues;
-            log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
-                      " (should have been %lld, returned %lld)!\n", *s, *r );
+            log_error("ERROR: atomic function operated correctly but did NOT "
+                      "return correct 'old' value "
+                      " (should have been %lld, returned %lld)!\n",
+                      *s, *r);
         }
         return -1;
     }
 
-    delete [] destItems;
-    free( refValues );
-    if( startRefValues != NULL )
-        free( startRefValues );
+    delete[] destItems;
+    free(refValues);
+    if (startRefValues != NULL) free(startRefValues);
 
     return 0;
 }
 
-int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
-                             TestFns testFns,
-                             bool extended, bool matchGroupSize, bool usingAtomicPrefix )
+int test_atomic_function_set(cl_device_id deviceID, cl_context context,
+                             cl_command_queue queue, int num_elements,
+                             const char *programCore, TestFns testFns,
+                             bool extended, bool matchGroupSize,
+                             bool usingAtomicPrefix)
 {
-    log_info("    Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
+    log_info("    Testing %s functions...\n",
+             usingAtomicPrefix ? "atomic_" : "atom_");
 
     int errors = 0;
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
-
-    // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
-    // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
-    if (!usingAtomicPrefix) {
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
-      errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, false, kInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, false, kUInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, true, kInt,
+                                   matchGroupSize);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   programCore, testFns, extended, true, kUInt,
+                                   matchGroupSize);
+
+    // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64
+    // bit functions still use the "atom" prefix. The argument usingAtomicPrefix
+    // is set to true if programCore was generated with the "atomic" prefix.
+    if (!usingAtomicPrefix)
+    {
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, false,
+                                       kLong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, false,
+                                       kULong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, true,
+                                       kLong, matchGroupSize);
+        errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                       programCore, testFns, extended, true,
+                                       kULong, matchGroupSize);
     }
 
     return errors;
@@ -454,265 +537,345 @@ int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_comma
 #pragma mark ---- add
 
 const char atom_add_core[] =
-"    oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
-"    atom_add( &destMemory[0], tid + 3 );\n"
-"   atom_add( &destMemory[0], tid + 3 );\n"
-"   atom_add( &destMemory[0], tid + 3 );\n";
+    "    oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n"
+    "    atom_add( &destMemory[0], tid + 3 );\n";
 
 const char atomic_add_core[] =
-"    oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
-"    atomic_add( &destMemory[0], tid + 3 );\n"
-"   atomic_add( &destMemory[0], tid + 3 );\n"
-"   atomic_add( &destMemory[0], tid + 3 );\n";
+    "    oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n"
+    "    atomic_add( &destMemory[0], tid + 3 );\n";
 
-cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_add_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0;
-    for( size_t i = 0; i < size; i++ )
-        total += ( (cl_int)i + 3 ) * 4;
+    for (size_t i = 0; i < size; i++) total += ((cl_int)i + 3) * 4;
     return total;
 }
 
-cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_add_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0;
-    for( size_t i = 0; i < size; i++ )
-        total += ( ( i + 3 ) * 4 );
+    for (size_t i = 0; i < size; i++) total += ((i + 3) * 4);
     return total;
 }
 
-int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
+    TestFns set = { 0,
+                    0LL,
+                    NULL,
+                    test_atomic_add_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_add_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_add_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
+        return -1;
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_add_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
-      return -1;
     return 0;
 }
 
 #pragma mark ---- sub
 
-const char atom_sub_core[] = "    oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
+const char atom_sub_core[] =
+    "    oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
 
-const char atomic_sub_core[] = "    oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
+const char atomic_sub_core[] =
+    "    oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
 
-cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_sub_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = INT_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total -= (cl_int)i + 3;
+    for (size_t i = 0; i < size; i++) total -= (cl_int)i + 3;
     return total;
 }
 
-cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_sub_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = LONG_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total -= i + 3;
+    for (size_t i = 0; i < size; i++) total -= i + 3;
     return total;
 }
 
-int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_sub(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_sub_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_sub_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_sub_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_sub_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
 
 #pragma mark ---- xchg
 
-const char atom_xchg_core[] = "    oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
+const char atom_xchg_core[] =
+    "    oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
 
-const char atomic_xchg_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
-const char atomic_xchg_float_core[] = "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_core[] =
+    "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
+const char atomic_xchg_float_core[] =
+    "    oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
 
-bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
+bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
+                                 cl_int finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( refValues[ i ] == INT_TEST_VALUE )
+        if (refValues[i] == INT_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %ld outside of valid range! (%d)\n", i,
+                refValues[i]);
             return false;
         }
-        valids[ refValues[ i ] ] ++;
+        valids[refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[finalValue] > 0)
     {
-        log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %d was also in ref list!\n", finalValue);
         return false;
     }
     else
-        valids[ finalValue ] = 1;    // So the following loop will be okay
+        valids[finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %ld did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
+bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
+                                  cl_long finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( refValues[ i ] == LONG_TEST_VALUE )
+        if (refValues[i] == LONG_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %ld outside of valid range! (%lld)\n",
+                i, refValues[i]);
             return false;
         }
-        valids[ refValues[ i ] ] ++;
+        valids[refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[finalValue] > 0)
     {
-        log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %lld was also in ref list!\n",
+                  finalValue);
         return false;
     }
     else
-        valids[ finalValue ] = 1;    // So the following loop will be okay
+        valids[finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %ld did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
+bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
+                                   cl_float finalValue)
 {
-    /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
+    /* For xchg, each value from 0 to size - 1 should have an entry in the ref
+     * array, and ONLY one entry */
     char *valids;
     size_t i;
     char originalValidCount = 0;
 
-    valids = (char *)malloc( sizeof( char ) * size );
-    memset( valids, 0, sizeof( char ) * size );
+    valids = (char *)malloc(sizeof(char) * size);
+    memset(valids, 0, sizeof(char) * size);
 
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
-        if( *intRefValue == INT_TEST_VALUE )
+        cl_int *intRefValue = (cl_int *)(&refValues[i]);
+        if (*intRefValue == INT_TEST_VALUE)
         {
             // Special initial value
             originalValidCount++;
             continue;
         }
-        if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
+        if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
-            log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
+            log_error(
+                "ERROR: Reference value %ld outside of valid range! (%a)\n", i,
+                refValues[i]);
             return false;
         }
-        valids[ (int)refValues[ i ] ] ++;
+        valids[(int)refValues[i]]++;
     }
 
-    /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
-     the final value outputted */
-    if( valids[ (int)finalValue ] > 0 )
+    /* Note: ONE entry will have zero count. It'll be the last one that
+     executed, because that value should be the final value outputted */
+    if (valids[(int)finalValue] > 0)
     {
-        log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
+        log_error("ERROR: Final value %a was also in ref list!\n", finalValue);
         return false;
     }
     else
-        valids[ (int)finalValue ] = 1;    // So the following loop will be okay
+        valids[(int)finalValue] = 1; // So the following loop will be okay
 
     /* Now check that every entry has one and only one count */
-    if( originalValidCount != 1 )
+    if (originalValidCount != 1)
     {
-        log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
+        log_error("ERROR: Starting reference value %d did not occur "
+                  "once-and-only-once (occurred %d)\n",
+                  65191, originalValidCount);
         return false;
     }
-    for( i = 0; i < size; i++ )
+    for (i = 0; i < size; i++)
     {
-        if( valids[ i ] != 1 )
+        if (valids[i] != 1)
         {
-            log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
-            for( size_t j = 0; j < size; j++ )
-                log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
+            log_error("ERROR: Reference value %ld did not occur "
+                      "once-and-only-once (occurred %d)\n",
+                      i, valids[i]);
+            for (size_t j = 0; j < size; j++)
+                log_info("%d: %d\n", (int)j, (int)valids[j]);
             return false;
         }
     }
 
-    free( valids );
+    free(valids);
     return true;
 }
 
-int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xchg(cl_device_id deviceID, cl_context context,
+                     cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
-
-    int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false  );
-    errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true  );
-
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_int,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_long,
+                    NULL,
+                    NULL,
+                    test_atomic_xchg_verify_float };
+
+    int errors = test_atomic_function_set(
+        deviceID, context, queue, num_elements, atom_xchg_core, set, false,
+        true, /*usingAtomicPrefix*/ false);
+    errors |= test_atomic_function_set(deviceID, context, queue, num_elements,
+                                       atomic_xchg_core, set, false, true,
+                                       /*usingAtomicPrefix*/ true);
+
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_xchg_float_core, set, false, false,
+                                   kFloat, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_xchg_float_core, set, false, true,
+                                   kFloat, true);
 
     return errors;
 }
@@ -720,51 +883,71 @@ int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue
 
 #pragma mark ---- min
 
-const char atom_min_core[] = "    oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
+const char atom_min_core[] =
+    "    oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
 
-const char atomic_min_core[] = "    oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
+const char atomic_min_core[] =
+    "    oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
 
-cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_min_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0x7fffffffL;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] < total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] < total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_min_gen_int(size_t size, cl_int *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
 }
 
-cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_min_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0x7fffffffffffffffLL;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] < total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] < total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_min_gen_long(size_t size, cl_long *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_long)(genrand_int32(d)
+                      | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
 }
 
-int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_min(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { 0x7fffffffL,
+                    0x7fffffffffffffffLL,
+                    NULL,
+                    test_atomic_min_result_int,
+                    test_atomic_min_gen_int,
+                    NULL,
+                    test_atomic_min_result_long,
+                    test_atomic_min_gen_long,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_min_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_min_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -772,79 +955,118 @@ int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue
 
 #pragma mark ---- max
 
-const char atom_max_core[] = "    oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
+const char atom_max_core[] =
+    "    oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
 
-const char atomic_max_core[] = "    oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
+const char atomic_max_core[] =
+    "    oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
 
-cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_max_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     cl_int total = 0;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] > total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] > total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
+void test_atomic_max_gen_int(size_t size, cl_int *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_int)(genrand_int32(d) % 0x3fffffff) + 0x3fffffff;
 }
 
-cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_max_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     cl_long total = 0;
-    for( size_t i = 0; i < size; i++ )
+    for (size_t i = 0; i < size; i++)
     {
-        if( startRefValues[ i ] > total )
-            total = startRefValues[ i ];
+        if (startRefValues[i] > total) total = startRefValues[i];
     }
     return total;
 }
 
-void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
+void test_atomic_max_gen_long(size_t size, cl_long *startRefValues, MTdata d)
 {
-    for( size_t i = 0; i < size; i++ )
-        startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
+    for (size_t i = 0; i < size; i++)
+        startRefValues[i] =
+            (cl_long)(genrand_int32(d)
+                      | (((cl_long)genrand_int32(d) & 0x7fffffffL) << 16));
 }
 
-int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_max(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { 0,
+                    0,
+                    NULL,
+                    test_atomic_max_result_int,
+                    test_atomic_max_gen_int,
+                    NULL,
+                    test_atomic_max_result_long,
+                    test_atomic_max_gen_long,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_max_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
+        return -1;
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_max_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
-      return -1;
     return 0;
 }
 
 
 #pragma mark ---- inc
 
-const char atom_inc_core[] = "    oldValues[tid] = atom_inc( &destMemory[0] );\n";
+const char atom_inc_core[] =
+    "    oldValues[tid] = atom_inc( &destMemory[0] );\n";
 
-const char atomic_inc_core[] = "    oldValues[tid] = atomic_inc( &destMemory[0] );\n";
+const char atomic_inc_core[] =
+    "    oldValues[tid] = atomic_inc( &destMemory[0] );\n";
 
-cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_inc_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     return INT_TEST_VALUE + (cl_int)size;
 }
 
-cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_inc_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     return LONG_TEST_VALUE + size;
 }
 
-int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_inc(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_inc_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_inc_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_inc_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_inc_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -852,27 +1074,46 @@ int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue
 
 #pragma mark ---- dec
 
-const char atom_dec_core[] = "    oldValues[tid] = atom_dec( &destMemory[0] );\n";
+const char atom_dec_core[] =
+    "    oldValues[tid] = atom_dec( &destMemory[0] );\n";
 
-const char atomic_dec_core[] = "    oldValues[tid] = atomic_dec( &destMemory[0] );\n";
+const char atomic_dec_core[] =
+    "    oldValues[tid] = atomic_dec( &destMemory[0] );\n";
 
-cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+cl_int test_atomic_dec_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichDestValue)
 {
     return INT_TEST_VALUE - (cl_int)size;
 }
 
-cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_dec_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichDestValue)
 {
     return LONG_TEST_VALUE - size;
 }
 
-int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_dec(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_dec_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_dec_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_dec_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_dec_core, set, false,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -881,129 +1122,159 @@ int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue
 #pragma mark ---- cmpxchg
 
 /* We test cmpxchg by implementing (the long way) atom_add */
+// clang-format off
 const char atom_cmpxchg_core[] =
-"    int oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
+    "    int oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
 
 const char atom_cmpxchg64_core[] =
-"    long oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
+    "    long oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
 
 const char atomic_cmpxchg_core[] =
-"    int oldValue, origValue, newValue;\n"
-"    do { \n"
-"        origValue = destMemory[0];\n"
-"        newValue = origValue + tid + 2;\n"
-"        oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
-"    } while( oldValue != origValue );\n"
-"    oldValues[tid] = oldValue;\n"
-;
-
-cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
+    "    int oldValue, origValue, newValue;\n"
+    "    do { \n"
+    "        origValue = destMemory[0];\n"
+    "        newValue = origValue + tid + 2;\n"
+    "        oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
+    "    } while( oldValue != origValue );\n"
+    "    oldValues[tid] = oldValue;\n";
+// clang-format on
+
+cl_int test_atomic_cmpxchg_result_int(size_t size, cl_int *startRefValues,
+                                      size_t whichDestValue)
 {
     cl_int total = INT_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total += (cl_int)i + 2;
+    for (size_t i = 0; i < size; i++) total += (cl_int)i + 2;
     return total;
 }
 
-cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
+cl_long test_atomic_cmpxchg_result_long(size_t size, cl_long *startRefValues,
+                                        size_t whichDestValue)
 {
     cl_long total = LONG_TEST_VALUE;
-    for( size_t i = 0; i < size; i++ )
-        total += i + 2;
+    for (size_t i = 0; i < size; i++) total += i + 2;
     return total;
 }
 
-int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context,
+                        cl_command_queue queue, int num_elements)
 {
-    TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
+    TestFns set = { INT_TEST_VALUE,
+                    LONG_TEST_VALUE,
+                    NULL,
+                    test_atomic_cmpxchg_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_cmpxchg_result_long,
+                    NULL,
+                    NULL };
 
     int errors = 0;
 
     log_info("    Testing atom_ functions...\n");
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
-
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, false, kInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, false, kUInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, true, kInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atom_cmpxchg_core, set, false, true, kUInt, true);
+
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, false,
+                                   kLong, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, false,
+                                   kULong, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, true, kLong,
+                                   true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atom_cmpxchg64_core, set, false, true,
+                                   kULong, true);
 
     log_info("    Testing atomic_ functions...\n");
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
-    errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
-
-    if( errors )
-        return -1;
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, false, kInt,
+                                   true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, false,
+                                   kUInt, true);
+    errors |=
+        test_atomic_function(deviceID, context, queue, num_elements,
+                             atomic_cmpxchg_core, set, false, true, kInt, true);
+    errors |= test_atomic_function(deviceID, context, queue, num_elements,
+                                   atomic_cmpxchg_core, set, false, true, kUInt,
+                                   true);
+
+    if (errors) return -1;
 
     return 0;
 }
 
 #pragma mark -------- Bitwise functions
 
-size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
+size_t test_bitwise_num_results(size_t threadCount, ExplicitType dataType)
 {
-    size_t numBits = get_explicit_type_size( dataType ) * 8;
+    size_t numBits = get_explicit_type_size(dataType) * 8;
 
-    return ( threadCount + numBits - 1 ) / numBits;
+    return (threadCount + numBits - 1) / numBits;
 }
 
 #pragma mark ---- and
 
+// clang-format off
 const char atom_and_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
 
 const char atomic_and_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n";
+// clang-format on
 
 
-cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_and_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 31 ) / 32;
-    if( whichResult < numThreads - 1 )
-        return 0;
+    size_t numThreads = ((size_t)size + 31) / 32;
+    if (whichResult < numThreads - 1) return 0;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 32;
     cl_int bits = (cl_int)0xffffffffL;
-    for( size_t i = 0; i < numBits; i++ )
-        bits &= ~( 1 << i );
+    for (size_t i = 0; i < numBits; i++) bits &= ~(1 << i);
 
     return bits;
 }
 
-cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_and_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 63 ) / 64;
-    if( whichResult < numThreads - 1 )
-        return 0;
+    size_t numThreads = ((size_t)size + 63) / 64;
+    if (whichResult < numThreads - 1) return 0;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
@@ -1013,14 +1284,28 @@ cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_
     return bits;
 }
 
-int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_and(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
-        test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { 0xffffffff,
+                    0xffffffffffffffffLL,
+                    test_bitwise_num_results,
+                    test_atomic_and_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_and_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_and_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_and_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -1028,59 +1313,68 @@ int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue
 
 #pragma mark ---- or
 
+// clang-format off
 const char atom_or_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
 
 const char atomic_or_core[] =
-"    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
-"    int  whichResult = tid / numBits;\n"
-"    int  bitIndex = tid - ( whichResult * numBits );\n"
-"\n"
-"    oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
-;
-
-cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+    "    size_t numBits = sizeof( destMemory[0] ) * 8;\n"
+    "    int  whichResult = tid / numBits;\n"
+    "    int  bitIndex = tid - ( whichResult * numBits );\n"
+    "\n"
+    "    oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n";
+// clang-format on
+
+cl_int test_atomic_or_result_int(size_t size, cl_int *startRefValues,
+                                 size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 31 ) / 32;
-    if( whichResult < numThreads - 1 )
-        return 0xffffffff;
+    size_t numThreads = ((size_t)size + 31) / 32;
+    if (whichResult < numThreads - 1) return 0xffffffff;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 32;
     cl_int bits = 0;
-    for( size_t i = 0; i < numBits; i++ )
-        bits |= ( 1 << i );
+    for (size_t i = 0; i < numBits; i++) bits |= (1 << i);
 
     return bits;
 }
 
-cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_or_result_long(size_t size, cl_long *startRefValues,
+                                   size_t whichResult)
 {
-    size_t numThreads = ( (size_t)size + 63 ) / 64;
-    if( whichResult < numThreads - 1 )
-        return 0x0ffffffffffffffffLL;
+    size_t numThreads = ((size_t)size + 63) / 64;
+    if (whichResult < numThreads - 1) return 0x0ffffffffffffffffLL;
 
     // Last item doesn't get and'ed on every bit, so we have to mask away
     size_t numBits = (size_t)size - whichResult * 64;
     cl_long bits = 0;
-    for( size_t i = 0; i < numBits; i++ )
-        bits |= ( 1LL << i );
+    for (size_t i = 0; i < numBits; i++) bits |= (1LL << i);
 
     return bits;
 }
 
-int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_or(cl_device_id deviceID, cl_context context,
+                   cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
+    TestFns set = {
+        0,    0LL,  test_bitwise_num_results,   test_atomic_or_result_int,
+        NULL, NULL, test_atomic_or_result_long, NULL,
+        NULL
+    };
 
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_or_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_or_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
@@ -1100,33 +1394,44 @@ const char atomic_xor_core[] =
     "\n"
     "    oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
 
-cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
+cl_int test_atomic_xor_result_int(size_t size, cl_int *startRefValues,
+                                  size_t whichResult)
 {
     cl_int total = 0x2f08ab41;
-    for( size_t i = 0; i < size; i++ )
-        total ^= ( 1 << ( i & 31 ) );
+    for (size_t i = 0; i < size; i++) total ^= (1 << (i & 31));
     return total;
 }
 
-cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
+cl_long test_atomic_xor_result_long(size_t size, cl_long *startRefValues,
+                                    size_t whichResult)
 {
     cl_long total = 0x2f08ab418ba0541LL;
-    for( size_t i = 0; i < size; i++ )
-        total ^= ( 1LL << ( i & 63 ) );
+    for (size_t i = 0; i < size; i++) total ^= (1LL << (i & 63));
     return total;
 }
 
-int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_xor(cl_device_id deviceID, cl_context context,
+                    cl_command_queue queue, int num_elements)
 {
-    TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
-
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false  ) != 0 )
+    TestFns set = { 0x2f08ab41,
+                    0x2f08ab418ba0541LL,
+                    NULL,
+                    test_atomic_xor_result_int,
+                    NULL,
+                    NULL,
+                    test_atomic_xor_result_long,
+                    NULL,
+                    NULL };
+
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atom_xor_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false)
+        != 0)
         return -1;
-    if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true  ) != 0 )
+    if (test_atomic_function_set(
+            deviceID, context, queue, num_elements, atomic_xor_core, set, true,
+            /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true)
+        != 0)
         return -1;
     return 0;
 }
-
-
-
-
diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index b85e3d24..d625d8b4 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -16,22 +16,25 @@
 #include "testBase.h"
 #include "harness/conversions.h"
 
-const char * atomic_index_source =
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// Counter keeps track of which index in counts we are using.\n"
-"// We get that value, increment it, and then set that index in counts to our thread ID.\n"
-"// At the end of this we should have all thread IDs in some random location in counts\n"
-"// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
-"// will be missing some.\n"
-"\n"
-"__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
-"    int tid = get_global_id(0);\n"
-"    \n"
-"    int counter_to_use = atom_add(counter, 1);\n"
-"    counts[counter_to_use] = tid;\n"
-"}";
-
-int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+// clang-format off
+const char *atomic_index_source =
+    "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+    "// Counter keeps track of which index in counts we are using.\n"
+    "// We get that value, increment it, and then set that index in counts to our thread ID.\n"
+    "// At the end of this we should have all thread IDs in some random location in counts\n"
+    "// exactly once. If atom_add failed then we will write over various thread IDs and we\n"
+    "// will be missing some.\n"
+    "\n"
+    "__kernel void add_index_test(__global int *counter, __global int *counts) {\n"
+    "    int tid = get_global_id(0);\n"
+    "    \n"
+    "    int counter_to_use = atom_add(counter, 1);\n"
+    "    counts[counter_to_use] = tid;\n"
+    "}";
+// clang-format on
+
+int test_atomic_add_index(cl_device_id deviceID, cl_context context,
+                          cl_command_queue queue, int num_elements)
 {
     clProgramWrapper program;
     clKernelWrapper kernel;
@@ -39,25 +42,29 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
     size_t numGlobalThreads, numLocalThreads;
     int fail = 0, succeed = 0, err;
 
-  /* Check if atomics are supported. */
-  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
-    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
-    return 0;
-  }
+    /* Check if atomics are supported. */
+    if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+    {
+        log_info("Base atomics not supported "
+                 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+        return 0;
+    }
 
     //===== add_index test
     // The index test replicates what particles does.
-    // It uses one memory location to keep track of the current index and then each thread
-    // does an atomic add to it to get its new location. The threads then write to their
-    // assigned location. At the end we check to make sure that each thread's ID shows up
-    // exactly once in the output.
+    // It uses one memory location to keep track of the current index and then
+    // each thread does an atomic add to it to get its new location. The threads
+    // then write to their assigned location. At the end we check to make sure
+    // that each thread's ID shows up exactly once in the output.
 
     numGlobalThreads = 2048;
 
-    if( create_single_kernel_helper( context, &program, &kernel, 1, &atomic_index_source, "add_index_test" ) )
+    if (create_single_kernel_helper(context, &program, &kernel, 1,
+                                    &atomic_index_source, "add_index_test"))
         return -1;
 
-    if( get_max_common_work_group_size( context, kernel, numGlobalThreads, &numLocalThreads ) )
+    if (get_max_common_work_group_size(context, kernel, numGlobalThreads,
+                                       &numLocalThreads))
         return -1;
 
     log_info("Execute global_threads:%d local_threads:%d\n",
@@ -72,86 +79,133 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context, cl_command_
                               sizeof(cl_int) * numGlobalThreads, NULL, NULL);
 
     // Reset all those locations to -1 to indciate they have not been used.
-    cl_int *values = (cl_int*) malloc(sizeof(cl_int)*numGlobalThreads);
-    if (values == NULL) {
-        log_error("add_index_test FAILED to allocate memory for initial values.\n");
-        fail = 1; succeed = -1;
-    } else {
+    cl_int *values = (cl_int *)malloc(sizeof(cl_int) * numGlobalThreads);
+    if (values == NULL)
+    {
+        log_error(
+            "add_index_test FAILED to allocate memory for initial values.\n");
+        fail = 1;
+        succeed = -1;
+    }
+    else
+    {
         memset(values, -1, numLocalThreads);
-        unsigned int i=0;
-        for (i=0; i<numGlobalThreads; i++)
-            values[i] = -1;
-        int init=0;
-        err = clEnqueueWriteBuffer(queue, counters, true, 0, numGlobalThreads*sizeof(cl_int), values, 0, NULL, NULL);
-        err |= clEnqueueWriteBuffer(queue, counter, true, 0,1*sizeof(cl_int), &init, 0, NULL, NULL);
-        if (err) {
-            log_error("add_index_test FAILED to write initial values to arrays: %d\n", err);
-            fail=1; succeed=-1;
-        } else {
+        unsigned int i = 0;
+        for (i = 0; i < numGlobalThreads; i++) values[i] = -1;
+        int init = 0;
+        err = clEnqueueWriteBuffer(queue, counters, true, 0,
+                                   numGlobalThreads * sizeof(cl_int), values, 0,
+                                   NULL, NULL);
+        err |= clEnqueueWriteBuffer(queue, counter, true, 0, 1 * sizeof(cl_int),
+                                    &init, 0, NULL, NULL);
+        if (err)
+        {
+            log_error(
+                "add_index_test FAILED to write initial values to arrays: %d\n",
+                err);
+            fail = 1;
+            succeed = -1;
+        }
+        else
+        {
             err = clSetKernelArg(kernel, 0, sizeof(counter), &counter);
             err |= clSetKernelArg(kernel, 1, sizeof(counters), &counters);
-            if (err) {
-                log_error("add_index_test FAILED to set kernel arguments: %d\n", err);
-                fail=1; succeed=-1;
-            } else {
-                err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, &numGlobalThreads, &numLocalThreads, 0, NULL, NULL );
-                if (err) {
-                    log_error("add_index_test FAILED to execute kernel: %d\n", err);
-                    fail=1; succeed=-1;
-                } else {
-                    err = clEnqueueReadBuffer( queue, counters, true, 0, sizeof(cl_int)*numGlobalThreads, values, 0, NULL, NULL );
-                    if (err) {
-                        log_error("add_index_test FAILED to read back results: %d\n", err);
-                        fail = 1; succeed=-1;
-                    } else {
+            if (err)
+            {
+                log_error("add_index_test FAILED to set kernel arguments: %d\n",
+                          err);
+                fail = 1;
+                succeed = -1;
+            }
+            else
+            {
+                err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
+                                             &numGlobalThreads,
+                                             &numLocalThreads, 0, NULL, NULL);
+                if (err)
+                {
+                    log_error("add_index_test FAILED to execute kernel: %d\n",
+                              err);
+                    fail = 1;
+                    succeed = -1;
+                }
+                else
+                {
+                    err = clEnqueueReadBuffer(queue, counters, true, 0,
+                                              sizeof(cl_int) * numGlobalThreads,
+                                              values, 0, NULL, NULL);
+                    if (err)
+                    {
+                        log_error(
+                            "add_index_test FAILED to read back results: %d\n",
+                            err);
+                        fail = 1;
+                        succeed = -1;
+                    }
+                    else
+                    {
                         unsigned int looking_for, index;
-                        for (looking_for=0; looking_for<numGlobalThreads; looking_for++) {
-                            int instances_found=0;
-                            for (index=0; index<numGlobalThreads; index++) {
-                                if (values[index]==(int)looking_for)
+                        for (looking_for = 0; looking_for < numGlobalThreads;
+                             looking_for++)
+                        {
+                            int instances_found = 0;
+                            for (index = 0; index < numGlobalThreads; index++)
+                            {
+                                if (values[index] == (int)looking_for)
                                     instances_found++;
                             }
-                            if (instances_found != 1) {
-                                log_error("add_index_test FAILED: wrong number of instances (%d!=1) for counter %d.\n", instances_found, looking_for);
-                                fail = 1; succeed=-1;
+                            if (instances_found != 1)
+                            {
+                                log_error(
+                                    "add_index_test FAILED: wrong number of "
+                                    "instances (%d!=1) for counter %d.\n",
+                                    instances_found, looking_for);
+                                fail = 1;
+                                succeed = -1;
                             }
                         }
                     }
                 }
             }
         }
-        if (!fail) {
-            log_info("add_index_test passed. Each thread used exactly one index.\n");
+        if (!fail)
+        {
+            log_info(
+                "add_index_test passed. Each thread used exactly one index.\n");
         }
         free(values);
     }
     return fail;
 }
 
+// clang-format off
 const char *add_index_bin_kernel[] = {
-"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
-"// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
-"// using an atomic add to keep track of the current location to write into in each bin.\n"
-"// This is the same as the memory update for the particles demo.\n"
-"\n"
-"__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
-"    int tid = get_global_id(0);\n"
-"\n"
-"    int location = bin_assignments[tid];\n"
-"    int counter = atom_add(&bin_counters[location], 1);\n"
-"    bins[location*max_counts_per_bin + counter] = tid;\n"
-"}" };
-
-// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel
-// using an atomic add to keep track of the current location to write into in each bin.
-// This is the same as the memory update for the particles demo.
-int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_context context, MTdata d)
+    "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n"
+    "// This test assigns a bunch of values to bins and then tries to put them in the bins in parallel\n"
+    "// using an atomic add to keep track of the current location to write into in each bin.\n"
+    "// This is the same as the memory update for the particles demo.\n"
+    "\n"
+    "__kernel void add_index_bin_test(__global int *bin_counters, __global int *bins, __global int *bin_assignments, int max_counts_per_bin) {\n"
+    "    int tid = get_global_id(0);\n"
+    "\n"
+    "    int location = bin_assignments[tid];\n"
+    "    int counter = atom_add(&bin_counters[location], 1);\n"
+    "    bins[location*max_counts_per_bin + counter] = tid;\n"
+    "}" };
+// clang-format on
+
+// This test assigns a bunch of values to bins and then tries to put them in the
+// bins in parallel using an atomic add to keep track of the current location to
+// write into in each bin. This is the same as the memory update for the
+// particles demo.
+int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
+                       cl_context context, MTdata d)
 {
     int number_of_items = (int)global_threads[0];
     size_t local_threads[1];
     int divisor = 12;
-    int number_of_bins = number_of_items/divisor;
-    int max_counts_per_bin = divisor*2;
+    int number_of_bins = number_of_items / divisor;
+    int max_counts_per_bin = divisor * 2;
 
     int fail = 0;
     int succeed = 0;
@@ -160,15 +214,20 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
     clProgramWrapper program;
     clKernelWrapper kernel;
 
-    //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d items per bin (bins is %d long).\n",
-    //           number_of_items, number_of_bins, max_counts_per_bin, number_of_bins*max_counts_per_bin);
+    //  log_info("add_index_bin_test: %d items, into %d bins, with a max of %d
+    //  items per bin (bins is %d long).\n",
+    //           number_of_items, number_of_bins, max_counts_per_bin,
+    //           number_of_bins*max_counts_per_bin);
 
     //===== add_index_bin test
     // The index test replicates what particles does.
-    err = create_single_kernel_helper(context, &program, &kernel, 1, add_index_bin_kernel, "add_index_bin_test" );
-    test_error( err, "Unable to create testing kernel" );
+    err =
+        create_single_kernel_helper(context, &program, &kernel, 1,
+                                    add_index_bin_kernel, "add_index_bin_test");
+    test_error(err, "Unable to create testing kernel");
 
-    if( get_max_common_work_group_size( context, kernel, global_threads[0], &local_threads[0] ) )
+    if (get_max_common_work_group_size(context, kernel, global_threads[0],
+                                       &local_threads[0]))
         return -1;
 
     log_info("Execute global_threads:%d local_threads:%d\n",
@@ -185,152 +244,232 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
         clCreateBuffer(context, CL_MEM_READ_ONLY,
                        sizeof(cl_int) * number_of_items, NULL, NULL);
 
-    if (bin_counters == NULL) {
+    if (bin_counters == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bin_counters.\n");
         return -1;
     }
-    if (bins == NULL) {
+    if (bins == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bins.\n");
         return -1;
     }
-    if (bin_assignments == NULL) {
+    if (bin_assignments == NULL)
+    {
         log_error("add_index_bin_test FAILED to allocate bin_assignments.\n");
         return -1;
     }
 
     // Initialize our storage
-    cl_int *l_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
-    if (!l_bin_counts) {
-        log_error("add_index_bin_test FAILED to allocate initial values for bin_counters.\n");
+    cl_int *l_bin_counts = (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+    if (!l_bin_counts)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "bin_counters.\n");
         return -1;
     }
     int i;
-    for (i=0; i<number_of_bins; i++)
-        l_bin_counts[i] = 0;
-    err = clEnqueueWriteBuffer(queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, l_bin_counts, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bin_counters: %d\n", err);
+    for (i = 0; i < number_of_bins; i++) l_bin_counts[i] = 0;
+    err = clEnqueueWriteBuffer(queue, bin_counters, true, 0,
+                               sizeof(cl_int) * number_of_bins, l_bin_counts, 0,
+                               NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set initial values for "
+                  "bin_counters: %d\n",
+                  err);
         return -1;
     }
 
-    cl_int *values = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
-    if (!values) {
-        log_error("add_index_bin_test FAILED to allocate initial values for bins.\n");
+    cl_int *values =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+    if (!values)
+    {
+        log_error(
+            "add_index_bin_test FAILED to allocate initial values for bins.\n");
         return -1;
     }
-    for (i=0; i<number_of_bins*max_counts_per_bin; i++)
-        values[i] = -1;
-    err = clEnqueueWriteBuffer(queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, values, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bins: %d\n", err);
+    for (i = 0; i < number_of_bins * max_counts_per_bin; i++) values[i] = -1;
+    err = clEnqueueWriteBuffer(queue, bins, true, 0,
+                               sizeof(cl_int) * number_of_bins
+                                   * max_counts_per_bin,
+                               values, 0, NULL, NULL);
+    if (err)
+    {
+        log_error(
+            "add_index_bin_test FAILED to set initial values for bins: %d\n",
+            err);
         return -1;
     }
     free(values);
 
-    cl_int *l_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_items);
-    if (!l_bin_assignments) {
-        log_error("add_index_bin_test FAILED to allocate initial values for l_bin_assignments.\n");
+    cl_int *l_bin_assignments =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_items);
+    if (!l_bin_assignments)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "l_bin_assignments.\n");
         return -1;
     }
-    for (i=0; i<number_of_items; i++) {
-        int bin = random_in_range(0, number_of_bins-1, d);
-        while (l_bin_counts[bin] >= max_counts_per_bin) {
-            bin = random_in_range(0, number_of_bins-1, d);
+    for (i = 0; i < number_of_items; i++)
+    {
+        int bin = random_in_range(0, number_of_bins - 1, d);
+        while (l_bin_counts[bin] >= max_counts_per_bin)
+        {
+            bin = random_in_range(0, number_of_bins - 1, d);
         }
         if (bin >= number_of_bins)
-            log_error("add_index_bin_test internal error generating bin assignments: bin %d >= number_of_bins %d.\n", bin, number_of_bins);
-        if (l_bin_counts[bin]+1 > max_counts_per_bin)
-            log_error("add_index_bin_test internal error generating bin assignments: bin %d has more entries (%d) than max_counts_per_bin (%d).\n", bin, l_bin_counts[bin], max_counts_per_bin);
+            log_error("add_index_bin_test internal error generating bin "
+                      "assignments: bin %d >= number_of_bins %d.\n",
+                      bin, number_of_bins);
+        if (l_bin_counts[bin] + 1 > max_counts_per_bin)
+            log_error(
+                "add_index_bin_test internal error generating bin assignments: "
+                "bin %d has more entries (%d) than max_counts_per_bin (%d).\n",
+                bin, l_bin_counts[bin], max_counts_per_bin);
         l_bin_counts[bin]++;
         l_bin_assignments[i] = bin;
-        //     log_info("item %d assigned to bin %d (%d items)\n", i, bin, l_bin_counts[bin]);
+        //     log_info("item %d assigned to bin %d (%d items)\n", i, bin,
+        //     l_bin_counts[bin]);
     }
-    err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0, sizeof(cl_int)*number_of_items, l_bin_assignments, 0, NULL, NULL);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set initial values for bin_assignments: %d\n", err);
+    err = clEnqueueWriteBuffer(queue, bin_assignments, true, 0,
+                               sizeof(cl_int) * number_of_items,
+                               l_bin_assignments, 0, NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set initial values for "
+                  "bin_assignments: %d\n",
+                  err);
         return -1;
     }
     // Setup the kernel
     err = clSetKernelArg(kernel, 0, sizeof(bin_counters), &bin_counters);
     err |= clSetKernelArg(kernel, 1, sizeof(bins), &bins);
     err |= clSetKernelArg(kernel, 2, sizeof(bin_assignments), &bin_assignments);
-    err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin), &max_counts_per_bin);
-    if (err) {
-        log_error("add_index_bin_test FAILED to set kernel arguments: %d\n", err);
-        fail=1; succeed=-1;
+    err |= clSetKernelArg(kernel, 3, sizeof(max_counts_per_bin),
+                          &max_counts_per_bin);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
+                  err);
+        fail = 1;
+        succeed = -1;
         return -1;
     }
 
-    err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, global_threads, local_threads, 0, NULL, NULL );
-    if (err) {
+    err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_threads,
+                                 local_threads, 0, NULL, NULL);
+    if (err)
+    {
         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
-        fail=1; succeed=-1;
+        fail = 1;
+        succeed = -1;
     }
 
-    cl_int *final_bin_assignments = (cl_int*)malloc(sizeof(cl_int)*number_of_bins*max_counts_per_bin);
-    if (!final_bin_assignments) {
-        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_assignments.\n");
+    cl_int *final_bin_assignments =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins * max_counts_per_bin);
+    if (!final_bin_assignments)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "final_bin_assignments.\n");
         return -1;
     }
-    err = clEnqueueReadBuffer( queue, bins, true, 0, sizeof(cl_int)*number_of_bins*max_counts_per_bin, final_bin_assignments, 0, NULL, NULL );
-    if (err) {
+    err = clEnqueueReadBuffer(queue, bins, true, 0,
+                              sizeof(cl_int) * number_of_bins
+                                  * max_counts_per_bin,
+                              final_bin_assignments, 0, NULL, NULL);
+    if (err)
+    {
         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
-        fail = 1; succeed=-1;
+        fail = 1;
+        succeed = -1;
     }
 
-    cl_int *final_bin_counts = (cl_int*)malloc(sizeof(cl_int)*number_of_bins);
-    if (!final_bin_counts) {
-        log_error("add_index_bin_test FAILED to allocate initial values for final_bin_counts.\n");
+    cl_int *final_bin_counts =
+        (cl_int *)malloc(sizeof(cl_int) * number_of_bins);
+    if (!final_bin_counts)
+    {
+        log_error("add_index_bin_test FAILED to allocate initial values for "
+                  "final_bin_counts.\n");
         return -1;
     }
-    err = clEnqueueReadBuffer( queue, bin_counters, true, 0, sizeof(cl_int)*number_of_bins, final_bin_counts, 0, NULL, NULL );
-    if (err) {
-        log_error("add_index_bin_test FAILED to read back bin_counters: %d\n", err);
-        fail = 1; succeed=-1;
+    err = clEnqueueReadBuffer(queue, bin_counters, true, 0,
+                              sizeof(cl_int) * number_of_bins, final_bin_counts,
+                              0, NULL, NULL);
+    if (err)
+    {
+        log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
+                  err);
+        fail = 1;
+        succeed = -1;
     }
 
     // Verification.
-    int errors=0;
+    int errors = 0;
     int current_bin;
     int search;
     //  Print out all the contents of the bins.
     //  for (current_bin=0; current_bin<number_of_bins; current_bin++)
     //        for (search=0; search<max_counts_per_bin; search++)
-    //      log_info("[bin %d, entry %d] = %d\n", current_bin, search, final_bin_assignments[current_bin*max_counts_per_bin+search]);
+    //      log_info("[bin %d, entry %d] = %d\n", current_bin, search,
+    //      final_bin_assignments[current_bin*max_counts_per_bin+search]);
 
     // First verify that there are the correct number in each bin.
-    for (current_bin=0; current_bin<number_of_bins; current_bin++) {
+    for (current_bin = 0; current_bin < number_of_bins; current_bin++)
+    {
         int expected_number = l_bin_counts[current_bin];
         int actual_number = final_bin_counts[current_bin];
-        if (expected_number != actual_number) {
-            log_error("add_index_bin_test FAILED: bin %d reported %d entries when %d were expected.\n", current_bin, actual_number, expected_number);
+        if (expected_number != actual_number)
+        {
+            log_error("add_index_bin_test FAILED: bin %d reported %d entries "
+                      "when %d were expected.\n",
+                      current_bin, actual_number, expected_number);
             errors++;
         }
-        for (search=0; search<expected_number; search++) {
-            if (final_bin_assignments[current_bin*max_counts_per_bin+search] == -1) {
-                log_error("add_index_bin_test FAILED: bin %d had no entry at position %d when it should have had %d entries.\n", current_bin, search, expected_number);
+        for (search = 0; search < expected_number; search++)
+        {
+            if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+                == -1)
+            {
+                log_error("add_index_bin_test FAILED: bin %d had no entry at "
+                          "position %d when it should have had %d entries.\n",
+                          current_bin, search, expected_number);
                 errors++;
             }
         }
-        for (search=expected_number; search<max_counts_per_bin; search++) {
-            if (final_bin_assignments[current_bin*max_counts_per_bin+search] != -1) {
-                log_error("add_index_bin_test FAILED: bin %d had an extra entry at position %d when it should have had only %d entries.\n", current_bin, search, expected_number);
+        for (search = expected_number; search < max_counts_per_bin; search++)
+        {
+            if (final_bin_assignments[current_bin * max_counts_per_bin + search]
+                != -1)
+            {
+                log_error(
+                    "add_index_bin_test FAILED: bin %d had an extra entry at "
+                    "position %d when it should have had only %d entries.\n",
+                    current_bin, search, expected_number);
                 errors++;
             }
         }
     }
     // Now verify that the correct ones are in each bin
     int index;
-    for (index=0; index<number_of_items; index++) {
+    for (index = 0; index < number_of_items; index++)
+    {
         int expected_bin = l_bin_assignments[index];
         int found_it = 0;
-        for (search=0; search<l_bin_counts[expected_bin]; search++) {
-            if (final_bin_assignments[expected_bin*max_counts_per_bin+search] == index) {
+        for (search = 0; search < l_bin_counts[expected_bin]; search++)
+        {
+            if (final_bin_assignments[expected_bin * max_counts_per_bin
+                                      + search]
+                == index)
+            {
                 found_it = 1;
             }
         }
-        if (found_it == 0) {
-            log_error("add_index_bin_test FAILED: did not find item %d in bin %d.\n", index, expected_bin);
+        if (found_it == 0)
+        {
+            log_error(
+                "add_index_bin_test FAILED: did not find item %d in bin %d.\n",
+                index, expected_bin);
             errors++;
         }
     }
@@ -341,41 +480,49 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue, cl_contex
     clReleaseMemObject(bin_counters);
     clReleaseMemObject(bins);
     clReleaseMemObject(bin_assignments);
-    if (errors == 0) {
-        log_info("add_index_bin_test passed. Each item was put in the correct bin in parallel.\n");
+    if (errors == 0)
+    {
+        log_info("add_index_bin_test passed. Each item was put in the correct "
+                 "bin in parallel.\n");
         return 0;
-    } else {
+    }
+    else
+    {
         log_error("add_index_bin_test FAILED: %d errors.\n", errors);
         return -1;
     }
 }
 
-int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
+int test_atomic_add_index_bin(cl_device_id deviceID, cl_context context,
+                              cl_command_queue queue, int num_elements)
 {
     //===== add_index_bin test
     size_t numGlobalThreads = 2048;
-    int iteration=0;
+    int iteration = 0;
     int err, failed = 0;
-    MTdata d = init_genrand( gRandomSeed );
-
-  /* Check if atomics are supported. */
-  if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics")) {
-    log_info("Base atomics not supported (cl_khr_global_int32_base_atomics). Skipping test.\n");
-    free_mtdata( d );
-    return 0;
-  }
+    MTdata d = init_genrand(gRandomSeed);
+
+    /* Check if atomics are supported. */
+    if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
+    {
+        log_info("Base atomics not supported "
+                 "(cl_khr_global_int32_base_atomics). Skipping test.\n");
+        free_mtdata(d);
+        return 0;
+    }
 
-    for(iteration=0; iteration<10; iteration++) {
-        log_info("add_index_bin_test with %d elements:\n", (int)numGlobalThreads);
-        err = add_index_bin_test(&numGlobalThreads,  queue,  context, d);
-        if (err) {
+    for (iteration = 0; iteration < 10; iteration++)
+    {
+        log_info("add_index_bin_test with %d elements:\n",
+                 (int)numGlobalThreads);
+        err = add_index_bin_test(&numGlobalThreads, queue, context, d);
+        if (err)
+        {
             failed++;
             break;
         }
-        numGlobalThreads*=2;
+        numGlobalThreads *= 2;
     }
-    free_mtdata( d );
+    free_mtdata(d);
     return failed;
 }
-
-
-- 
cgit v1.2.3


From c69f6eec47dd8d7549c94c76f0c2ef2c41a2d858 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 30 Sep 2022 11:41:19 +0100
Subject: [NFC] atomics: Remove set-but-unused "succeed" variables (#1517)

The "succeed" variables are never read and they don't seem to serve
any purpose that's not already provided by the "fail" variables.

In `add_index_bin_test` the "fail" variable is also set but unused,
but that may require an actual fix, so leaving that out of this
commit.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/atomics/test_indexed_cases.cpp | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/test_conformance/atomics/test_indexed_cases.cpp b/test_conformance/atomics/test_indexed_cases.cpp
index d625d8b4..2bba3e24 100644
--- a/test_conformance/atomics/test_indexed_cases.cpp
+++ b/test_conformance/atomics/test_indexed_cases.cpp
@@ -40,7 +40,7 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
     clKernelWrapper kernel;
     clMemWrapper counter, counters;
     size_t numGlobalThreads, numLocalThreads;
-    int fail = 0, succeed = 0, err;
+    int fail = 0, err;
 
     /* Check if atomics are supported. */
     if (!is_extension_available(deviceID, "cl_khr_global_int32_base_atomics"))
@@ -85,7 +85,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
         log_error(
             "add_index_test FAILED to allocate memory for initial values.\n");
         fail = 1;
-        succeed = -1;
     }
     else
     {
@@ -104,7 +103,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
                 "add_index_test FAILED to write initial values to arrays: %d\n",
                 err);
             fail = 1;
-            succeed = -1;
         }
         else
         {
@@ -115,7 +113,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
                 log_error("add_index_test FAILED to set kernel arguments: %d\n",
                           err);
                 fail = 1;
-                succeed = -1;
             }
             else
             {
@@ -127,7 +124,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
                     log_error("add_index_test FAILED to execute kernel: %d\n",
                               err);
                     fail = 1;
-                    succeed = -1;
                 }
                 else
                 {
@@ -140,7 +136,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
                             "add_index_test FAILED to read back results: %d\n",
                             err);
                         fail = 1;
-                        succeed = -1;
                     }
                     else
                     {
@@ -161,7 +156,6 @@ int test_atomic_add_index(cl_device_id deviceID, cl_context context,
                                     "instances (%d!=1) for counter %d.\n",
                                     instances_found, looking_for);
                                 fail = 1;
-                                succeed = -1;
                             }
                         }
                     }
@@ -208,7 +202,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     int max_counts_per_bin = divisor * 2;
 
     int fail = 0;
-    int succeed = 0;
     int err;
 
     clProgramWrapper program;
@@ -353,7 +346,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
         log_error("add_index_bin_test FAILED to set kernel arguments: %d\n",
                   err);
         fail = 1;
-        succeed = -1;
         return -1;
     }
 
@@ -363,7 +355,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to execute kernel: %d\n", err);
         fail = 1;
-        succeed = -1;
     }
 
     cl_int *final_bin_assignments =
@@ -382,7 +373,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
     {
         log_error("add_index_bin_test FAILED to read back bins: %d\n", err);
         fail = 1;
-        succeed = -1;
     }
 
     cl_int *final_bin_counts =
@@ -401,7 +391,6 @@ int add_index_bin_test(size_t *global_threads, cl_command_queue queue,
         log_error("add_index_bin_test FAILED to read back bin_counters: %d\n",
                   err);
         fail = 1;
-        succeed = -1;
     }
 
     // Verification.
-- 
cgit v1.2.3


From 73f51ccff747607b091c556e1b4b0134423e811e Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Sat, 1 Oct 2022 10:14:32 +0100
Subject: math_brute_force: Fix -Wformat warnings (#1518)

* math_brute_force: Fix -Wformat warnings

The main sources of warnings were:

 * Printing of 64-bit types, which is now done using the `PRI*64`
   macros from <cinttypes> to ensure portability across 32 and 64-bit
   builds.

 * Printing of `size_t` types that lacked a `z` length modifier.

 * Printing of values with a `z` length modifier that weren't a
   `size_t` type.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

* [NFC] math_brute_force: clang-format after -Wformat changes

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/math_brute_force/CMakeLists.txt   |  2 +-
 .../math_brute_force/binary_double.cpp             |  2 +-
 test_conformance/math_brute_force/binary_float.cpp |  4 +--
 .../math_brute_force/binary_i_float.cpp            |  4 +--
 .../math_brute_force/binary_operator_double.cpp    |  2 +-
 .../math_brute_force/binary_operator_float.cpp     |  4 +--
 .../binary_two_results_i_double.cpp                | 35 ++++++++++++----------
 .../binary_two_results_i_float.cpp                 | 31 ++++++++++---------
 .../math_brute_force/i_unary_double.cpp            |  6 ++--
 .../math_brute_force/i_unary_float.cpp             |  6 ++--
 .../math_brute_force/macro_binary_double.cpp       | 11 ++++---
 .../math_brute_force/macro_binary_float.cpp        |  4 +--
 .../math_brute_force/macro_unary_double.cpp        |  7 +++--
 test_conformance/math_brute_force/main.cpp         |  2 +-
 .../math_brute_force/ternary_double.cpp            |  6 ++--
 .../math_brute_force/ternary_float.cpp             |  5 ++--
 test_conformance/math_brute_force/unary_double.cpp |  3 +-
 .../math_brute_force/unary_two_results_double.cpp  |  6 ++--
 .../math_brute_force/unary_two_results_float.cpp   |  6 ++--
 .../unary_two_results_i_double.cpp                 | 10 ++++---
 .../math_brute_force/unary_two_results_i_float.cpp | 10 ++++---
 .../math_brute_force/unary_u_double.cpp            | 16 +++++-----
 .../math_brute_force/unary_u_float.cpp             |  6 ++--
 23 files changed, 111 insertions(+), 77 deletions(-)

diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 1db1ecdf..1c96f521 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -42,6 +42,6 @@ set(${MODULE_NAME}_SOURCES
 
 # math_brute_force compiles cleanly with -Wall (except for a few remaining
 # warnings), but other tests not (yet); so enable -Wall locally.
-set_gnulike_module_compile_flags("-Wall -Wno-format -Wno-strict-aliasing -Wno-unknown-pragmas")
+set_gnulike_module_compile_flags("-Wall -Wno-strict-aliasing -Wno-unknown-pragmas")
 
 include(../CMakeCommon.txt)
diff --git a/test_conformance/math_brute_force/binary_double.cpp b/test_conformance/math_brute_force/binary_double.cpp
index b6bb049b..f18d0b97 100644
--- a/test_conformance/math_brute_force/binary_double.cpp
+++ b/test_conformance/math_brute_force/binary_double.cpp
@@ -630,7 +630,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index e85add4b..fe1491d7 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -755,7 +755,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                     {
                         vlog_error(
                             "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
-                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
+                            "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n",
                             name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
                             s2[j], ((cl_uint *)s2)[j], r[j], test,
                             ((cl_uint *)&test)[0], j);
@@ -787,7 +787,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
diff --git a/test_conformance/math_brute_force/binary_i_float.cpp b/test_conformance/math_brute_force/binary_i_float.cpp
index 2387ff06..d855f447 100644
--- a/test_conformance/math_brute_force/binary_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_i_float.cpp
@@ -516,7 +516,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 {
                     vlog_error(
                         "\nERROR: %s%s: %f ulp error at {%a (0x%8.8x), %d}: "
-                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %d\n",
+                        "*%a (0x%8.8x) vs. %a (0x%8.8x) at index: %zu\n",
                         name, sizeNames[k], err, s[j], ((uint32_t *)s)[j],
                         s2[j], r[j], ((uint32_t *)r)[j], test,
                         ((cl_uint *)&test)[0], j);
@@ -545,7 +545,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
diff --git a/test_conformance/math_brute_force/binary_operator_double.cpp b/test_conformance/math_brute_force/binary_operator_double.cpp
index 34ec6197..bbe5c438 100644
--- a/test_conformance/math_brute_force/binary_operator_double.cpp
+++ b/test_conformance/math_brute_force/binary_operator_double.cpp
@@ -601,7 +601,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
diff --git a/test_conformance/math_brute_force/binary_operator_float.cpp b/test_conformance/math_brute_force/binary_operator_float.cpp
index 5577cffe..1a28d8d8 100644
--- a/test_conformance/math_brute_force/binary_operator_float.cpp
+++ b/test_conformance/math_brute_force/binary_operator_float.cpp
@@ -698,7 +698,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 if (fail)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at {%a, %a}: *%a "
-                               "vs. %a (0x%8.8x) at index: %d\n",
+                               "vs. %a (0x%8.8x) at index: %zu\n",
                                name, sizeNames[k], err, s[j], s2[j], r[j], test,
                                ((cl_uint *)&test)[0], j);
                     error = -1;
@@ -726,7 +726,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
     {
         if (gVerboseBruteForce)
         {
-            vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+            vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
                  "ThreadCount:%2u\n",
                  base, job->step, job->scale, buffer_elements, job->ulps,
                  job->threadCount);
diff --git a/test_conformance/math_brute_force/binary_two_results_i_double.cpp b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
index 59a5bfe2..bbfd707b 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -527,17 +528,20 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
 
                 if (fail)
                 {
-                    vlog_error(
-                        "\nERROR: %sD%s: {%f, %lld} ulp error at {%.13la, "
-                        "%.13la} ({ 0x%16.16llx, 0x%16.16llx}): *{%.13la, "
-                        "%d} ({ 0x%16.16llx, 0x%8.8x}) vs. {%.13la, %d} ({ "
-                        "0x%16.16llx, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((double *)gIn)[j],
-                        ((double *)gIn2)[j], ((cl_ulong *)gIn)[j],
-                        ((cl_ulong *)gIn2)[j], ((double *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_ulong *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
+                    vlog_error("\nERROR: %sD%s: {%f, %" PRId64
+                               "} ulp error at {%.13la, "
+                               "%.13la} ({ 0x%16.16" PRIx64 ", 0x%16.16" PRIx64
+                               "}): *{%.13la, "
+                               "%d} ({ 0x%16.16" PRIx64
+                               ", 0x%8.8x}) vs. {%.13la, %d} ({ "
+                               "0x%16.16" PRIx64 ", 0x%8.8x})\n",
+                               f->name, sizeNames[k], err, iErr,
+                               ((double *)gIn)[j], ((double *)gIn2)[j],
+                               ((cl_ulong *)gIn)[j], ((cl_ulong *)gIn2)[j],
+                               ((double *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+                               ((cl_ulong *)gOut_Ref)[j],
+                               ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+                               ((cl_ulong *)q)[j], ((cl_uint *)q2)[j]);
                     error = -1;
                     goto exit;
                 }
@@ -548,8 +552,9 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -566,8 +571,8 @@ int TestFunc_DoubleI_Double_Double(const Func *f, MTdata d, bool relaxedMode)
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/binary_two_results_i_float.cpp b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
index 6c1dd3bc..07473376 100644
--- a/test_conformance/math_brute_force/binary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/binary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -513,16 +514,17 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
 
                 if (fail)
                 {
-                    vlog_error(
-                        "\nERROR: %s%s: {%f, %lld} ulp error at {%a, %a} "
-                        "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
-                        "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
-                        f->name, sizeNames[k], err, iErr, ((float *)gIn)[j],
-                        ((float *)gIn2)[j], ((cl_uint *)gIn)[j],
-                        ((cl_uint *)gIn2)[j], ((float *)gOut_Ref)[j],
-                        ((int *)gOut_Ref2)[j], ((cl_uint *)gOut_Ref)[j],
-                        ((cl_uint *)gOut_Ref2)[j], test, q2[j],
-                        ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
+                    vlog_error("\nERROR: %s%s: {%f, %" PRId64
+                               "} ulp error at {%a, %a} "
+                               "({0x%8.8x, 0x%8.8x}): *{%a, %d} ({0x%8.8x, "
+                               "0x%8.8x}) vs. {%a, %d} ({0x%8.8x, 0x%8.8x})\n",
+                               f->name, sizeNames[k], err, iErr,
+                               ((float *)gIn)[j], ((float *)gIn2)[j],
+                               ((cl_uint *)gIn)[j], ((cl_uint *)gIn2)[j],
+                               ((float *)gOut_Ref)[j], ((int *)gOut_Ref2)[j],
+                               ((cl_uint *)gOut_Ref)[j],
+                               ((cl_uint *)gOut_Ref2)[j], test, q2[j],
+                               ((cl_uint *)&test)[0], ((cl_uint *)q2)[j]);
                     error = -1;
                     goto exit;
                 }
@@ -533,8 +535,9 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -551,8 +554,8 @@ int TestFunc_FloatI_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/i_unary_double.cpp b/test_conformance/math_brute_force/i_unary_double.cpp
index a05737da..0cbcf86e 100644
--- a/test_conformance/math_brute_force/i_unary_double.cpp
+++ b/test_conformance/math_brute_force/i_unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -271,8 +272,9 @@ int TestFunc_Int_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/i_unary_float.cpp b/test_conformance/math_brute_force/i_unary_float.cpp
index 13442e61..90bb1e16 100644
--- a/test_conformance/math_brute_force/i_unary_float.cpp
+++ b/test_conformance/math_brute_force/i_unary_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -268,8 +269,9 @@ int TestFunc_Int_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index b81766bd..412f210b 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -487,8 +488,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
             cl_ulong err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
-                       "vs. %lld  (index: %d)\n",
+            vlog_error("\nERROR: %s: %" PRId64
+                       " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+                       "vs. %" PRId64 "  (index: %zu)\n",
                        name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
                        q[j], j);
             error = -1;
@@ -535,8 +537,9 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
-                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
+                vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+                           "%.13la}: *%" PRId64 " vs. %" PRId64
+                           "  (index: %zu)\n",
                            name, sizeNames[k], err, ((double *)s)[j],
                            ((double *)s2)[j], -t[j], q[j], j);
                 error = -1;
diff --git a/test_conformance/math_brute_force/macro_binary_float.cpp b/test_conformance/math_brute_force/macro_binary_float.cpp
index 4a3fb67d..cb915fc7 100644
--- a/test_conformance/math_brute_force/macro_binary_float.cpp
+++ b/test_conformance/math_brute_force/macro_binary_float.cpp
@@ -478,7 +478,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
             uint32_t err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
             vlog_error("\nERROR: %s: %d ulp error at {%a, %a}: *0x%8.8x vs. "
-                       "0x%8.8x (index: %d)\n",
+                       "0x%8.8x (index: %zu)\n",
                        name, err, ((float *)s)[j], ((float *)s2)[j], t[j], q[j],
                        j);
             error = -1;
@@ -524,7 +524,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 cl_uint err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
                 vlog_error("\nERROR: %s%s: %d ulp error at {%a, %a}: *0x%8.8x "
-                           "vs. 0x%8.8x (index: %d)\n",
+                           "vs. 0x%8.8x (index: %zu)\n",
                            name, sizeNames[k], err, ((float *)s)[j],
                            ((float *)s2)[j], -t[j], q[j], j);
                 error = -1;
diff --git a/test_conformance/math_brute_force/macro_unary_double.cpp b/test_conformance/math_brute_force/macro_unary_double.cpp
index 19cefee4..c2e7cdcc 100644
--- a/test_conformance/math_brute_force/macro_unary_double.cpp
+++ b/test_conformance/math_brute_force/macro_unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -297,7 +298,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
             cl_ulong err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %sD: %zd ulp error at %.13la: *%zd vs. %zd\n",
+            vlog_error("\nERROR: %sD: %" PRId64
+                       " ulp error at %.13la: *%" PRId64 " vs. %" PRId64 "\n",
                        name, err, ((double *)gIn)[j], t[j], q[j]);
             return -1;
         }
@@ -323,7 +325,8 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
                 vlog_error(
-                    "\nERROR: %sD%s: %zd ulp error at %.13la: *%zd vs. %zd\n",
+                    "\nERROR: %sD%s: %" PRId64 " ulp error at %.13la: *%" PRId64
+                    " vs. %" PRId64 "\n",
                     name, sizeNames[k], err, ((double *)gIn)[j], -t[j], q[j]);
                 return -1;
             }
diff --git a/test_conformance/math_brute_force/main.cpp b/test_conformance/math_brute_force/main.cpp
index 59960a85..64491bd4 100644
--- a/test_conformance/math_brute_force/main.cpp
+++ b/test_conformance/math_brute_force/main.cpp
@@ -132,7 +132,7 @@ static int doTest(const char *name)
             if ((gStartTestNumber != ~0u && i < gStartTestNumber)
                 || i > gEndTestNumber)
             {
-                vlog("Skipping function #%d\n", i);
+                vlog("Skipping function #%zu\n", i);
                 return 0;
             }
 
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index 94fbe268..a7fa5625 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 #define CORRECTLY_ROUNDED 0
@@ -708,8 +709,9 @@ int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 762c57de..3b8c2c3b 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 #define CORRECTLY_ROUNDED 0
@@ -843,8 +844,8 @@ int TestFunc_Float_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10u bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64 " bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index 3deac57c..177cfe5b 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -345,7 +346,7 @@ cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
                 if (fail)
                 {
                     vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
-                               "(0x%16.16llx): *%.13la vs. %.13la\n",
+                               "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
                                job->f->name, sizeNames[k], err,
                                ((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
                                ((cl_double *)gOut_Ref)[j], test);
diff --git a/test_conformance/math_brute_force/unary_two_results_double.cpp b/test_conformance/math_brute_force/unary_two_results_double.cpp
index 858b2c35..6d7c61d6 100644
--- a/test_conformance/math_brute_force/unary_two_results_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -414,8 +415,9 @@ int TestFunc_Double2_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_two_results_float.cpp b/test_conformance/math_brute_force/unary_two_results_float.cpp
index 85e5d014..42e858c4 100644
--- a/test_conformance/math_brute_force/unary_two_results_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -546,8 +547,9 @@ int TestFunc_Float2_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_two_results_i_double.cpp b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
index 4cfbca9c..8b751944 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_double.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -386,8 +387,9 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -404,8 +406,8 @@ int TestFunc_DoubleI_Double(const Func *f, MTdata d, bool relaxedMode)
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_two_results_i_float.cpp b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
index e324ad09..54843a29 100644
--- a/test_conformance/math_brute_force/unary_two_results_i_float.cpp
+++ b/test_conformance/math_brute_force/unary_two_results_i_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <climits>
 #include <cstring>
 
@@ -384,8 +385,9 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
@@ -402,8 +404,8 @@ int TestFunc_FloatI_Float(const Func *f, MTdata d, bool relaxedMode)
         else
             vlog("passed");
 
-        vlog("\t{%8.2f, %lld} @ {%a, %a}", maxError, maxError2, maxErrorVal,
-             maxErrorVal2);
+        vlog("\t{%8.2f, %" PRId64 "} @ {%a, %a}", maxError, maxError2,
+             maxErrorVal, maxErrorVal2);
     }
 
     vlog("\n");
diff --git a/test_conformance/math_brute_force/unary_u_double.cpp b/test_conformance/math_brute_force/unary_u_double.cpp
index a0c6b793..9b60904a 100644
--- a/test_conformance/math_brute_force/unary_u_double.cpp
+++ b/test_conformance/math_brute_force/unary_u_double.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -267,11 +268,11 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
                     }
                     if (fail)
                     {
-                        vlog_error("\n%s%sD: %f ulp error at 0x%16.16llx: "
-                                   "*%.13la vs. %.13la\n",
-                                   f->name, sizeNames[k], err,
-                                   ((uint64_t *)gIn)[j],
-                                   ((double *)gOut_Ref)[j], test);
+                        vlog_error(
+                            "\n%s%sD: %f ulp error at 0x%16.16" PRIx64 ": "
+                            "*%.13la vs. %.13la\n",
+                            f->name, sizeNames[k], err, ((uint64_t *)gIn)[j],
+                            ((double *)gOut_Ref)[j], test);
                         error = -1;
                         goto exit;
                     }
@@ -283,8 +284,9 @@ int TestFunc_Double_ULong(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
diff --git a/test_conformance/math_brute_force/unary_u_float.cpp b/test_conformance/math_brute_force/unary_u_float.cpp
index ccfbc3be..b67a9bda 100644
--- a/test_conformance/math_brute_force/unary_u_float.cpp
+++ b/test_conformance/math_brute_force/unary_u_float.cpp
@@ -19,6 +19,7 @@
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
 namespace {
@@ -285,8 +286,9 @@ int TestFunc_Float_UInt(const Func *f, MTdata d, bool relaxedMode)
         {
             if (gVerboseBruteForce)
             {
-                vlog("base:%14u step:%10zu  bufferSize:%10zd \n", i, step,
-                     BUFFER_SIZE);
+                vlog("base:%14" PRIu64 " step:%10" PRIu64
+                     "  bufferSize:%10d \n",
+                     i, step, BUFFER_SIZE);
             }
             else
             {
-- 
cgit v1.2.3


From a3294d4c9542df3329fcf528429b7b1285cf4ebf Mon Sep 17 00:00:00 2001
From: victzhan <111778801+victzhan@users.noreply.github.com>
Date: Mon, 3 Oct 2022 09:26:43 -0400
Subject: Add Python 3 support to run_conformance.py (#1470)

---
 test_conformance/run_conformance.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/test_conformance/run_conformance.py b/test_conformance/run_conformance.py
index bb8f86ff..974491e1 100755
--- a/test_conformance/run_conformance.py
+++ b/test_conformance/run_conformance.py
@@ -16,7 +16,6 @@ import sys
 import subprocess
 import time
 import tempfile
-import string
 
 DEBUG = 0
 
@@ -27,7 +26,6 @@ process_pid = 0
 #  to the screen while the tests are running.
 seconds_between_status_updates = 60 * 60 * 24 * 7  # effectively never
 
-
 # Help info
 def write_help_info():
     print("run_conformance.py test_list [CL_DEVICE_TYPE(s) to test] [partial-test-names, ...] [log=path/to/log/file/]")
@@ -66,16 +64,16 @@ def get_tests(filename, devices_to_test):
         device_specific_match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*,\s*(.+?)\s*$", line)
         if device_specific_match:
             if device_specific_match.group(1) in devices_to_test:
-                test_path = string.replace(device_specific_match.group(3), '/', os.sep)
-                test_name = string.replace(device_specific_match.group(2), '/', os.sep)
+                test_path = str.replace(device_specific_match.group(3), '/', os.sep)
+                test_name = str.replace(device_specific_match.group(2), '/', os.sep)
                 tests.append((test_name, test_path))
             else:
                 print("Skipping " + device_specific_match.group(2) + " because " + device_specific_match.group(1) + " is not in the list of devices to test.")
             continue
         match = re.search("^\s*(.+?)\s*,\s*(.+?)\s*$", line)
         if match:
-            test_path = string.replace(match.group(2), '/', os.sep)
-            test_name = string.replace(match.group(1), '/', os.sep)
+            test_path = str.replace(match.group(2), '/', os.sep)
+            test_name = str.replace(match.group(1), '/', os.sep)
             tests.append((test_name, test_path))
     return tests
 
@@ -243,7 +241,10 @@ def run_tests(tests):
             # Catch an interrupt from the user
             write_screen_log("\nFAILED: Execution interrupted.  Killing test process, but not aborting full test run.")
             os.kill(process_pid, 9)
-            answer = raw_input("Abort all tests? (y/n)")
+            if sys.version_info[0] < 3:
+                answer = raw_input("Abort all tests? (y/n)")
+            else:
+                answer = input("Abort all tests? (y/n)")
             if answer.find("y") != -1:
                 write_screen_log("\nUser chose to abort all tests.")
                 log_file.close()
-- 
cgit v1.2.3


From 28e76e532a60dbe334bb3bcd729ef715adc8b1dc Mon Sep 17 00:00:00 2001
From: Jack Frankland <30410009+FranklandJack@users.noreply.github.com>
Date: Mon, 3 Oct 2022 14:32:11 +0100
Subject: Add missing type declaration (#1520)

Add a missing type declaration to OpenCL C code strings in 2D async copy
tests.
---
 test_conformance/basic/test_async_copy2D.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/basic/test_async_copy2D.cpp b/test_conformance/basic/test_async_copy2D.cpp
index 54633a31..bf3f1552 100644
--- a/test_conformance/basic/test_async_copy2D.cpp
+++ b/test_conformance/basic/test_async_copy2D.cpp
@@ -53,7 +53,7 @@ __kernel void test_fn(const __global %s *src, __global %s *dst,
 
   for (int i = 0; i < lineCopiesPerWorkItem; i++) {
     for (int j = 0; j < numElementsPerLine; j++) {
-      const local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
+      const int local_index = (get_local_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
       const int global_index = (get_global_id(0) * lineCopiesPerWorkItem + i) * dstStride + j;
       dst[global_index] = localBuffer[local_index];
     }
-- 
cgit v1.2.3


From 18825769e5e34dae0eefeae9ba19e3ac5af9a4d8 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Mon, 3 Oct 2022 22:09:05 +0100
Subject: pipes: Fix typos in skip messages (#1523)

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/pipes/test_pipe_read_write.cpp | 42 ++++++++++++++++---------
 test_conformance/pipes/test_pipe_subgroups.cpp  |  5 ++-
 2 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index a502e03e..cb72e96b 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -1075,7 +1075,8 @@ int test_pipe_readwrite_half( cl_device_id deviceID, cl_context context, cl_comm
 
     if(!is_extension_available(deviceID, "cl_khr_fp16"))
     {
-        log_info("cl_khr_fp16 is not supported on this platoform. Skipping test.\n");
+        log_info(
+            "cl_khr_fp16 is not supported on this platform. Skipping test.\n");
         return CL_SUCCESS;
     }
     ptrSizes[0] = sizeof(cl_float) / 2;
@@ -1256,7 +1257,8 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co
     //skip devices that don't support double
     if(!is_extension_available(deviceID, "cl_khr_fp64"))
     {
-        log_info("cl_khr_fp64 is not supported on this platoform. Skipping test.\n");
+        log_info(
+            "cl_khr_fp64 is not supported on this platform. Skipping test.\n");
         return CL_SUCCESS;
     }
 
@@ -1403,7 +1405,8 @@ int test_pipe_subgroup_readwrite_int( cl_device_id deviceID, cl_context context,
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_int(deviceID, context, queue, num_elements);
@@ -1417,7 +1420,8 @@ int test_pipe_subgroup_readwrite_uint( cl_device_id deviceID, cl_context context
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_uint(deviceID, context, queue, num_elements);
@@ -1431,7 +1435,8 @@ int test_pipe_subgroup_readwrite_short( cl_device_id deviceID, cl_context contex
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_short(deviceID, context, queue, num_elements);
@@ -1445,7 +1450,8 @@ int test_pipe_subgroup_readwrite_ushort( cl_device_id deviceID, cl_context conte
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_ushort(deviceID, context, queue, num_elements);
@@ -1459,7 +1465,8 @@ int test_pipe_subgroup_readwrite_char( cl_device_id deviceID, cl_context context
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_char(deviceID, context, queue, num_elements);
@@ -1473,7 +1480,8 @@ int test_pipe_subgroup_readwrite_uchar( cl_device_id deviceID, cl_context contex
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_uchar(deviceID, context, queue, num_elements);
@@ -1488,7 +1496,8 @@ int test_pipe_subgroup_readwrite_float( cl_device_id deviceID, cl_context contex
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_float(deviceID, context, queue, num_elements);
@@ -1502,7 +1511,8 @@ int test_pipe_subgroup_readwrite_half( cl_device_id deviceID, cl_context context
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_half(deviceID, context, queue, num_elements);
@@ -1516,7 +1526,8 @@ int test_pipe_subgroup_readwrite_long( cl_device_id deviceID, cl_context context
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_long(deviceID, context, queue, num_elements);
@@ -1530,7 +1541,8 @@ int test_pipe_subgroup_readwrite_ulong( cl_device_id deviceID, cl_context contex
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_ulong(deviceID, context, queue, num_elements);
@@ -1544,7 +1556,8 @@ int test_pipe_subgroup_readwrite_double( cl_device_id deviceID, cl_context conte
 
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     return test_pipe_readwrite_double(deviceID, context, queue, num_elements);
@@ -1554,7 +1567,8 @@ int test_pipe_subgroup_readwrite_struct( cl_device_id deviceID, cl_context conte
 {
     if(!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info("cl_khr_subgroups is not supported on this platoform. Skipping test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
     const char *kernelNames[] = {"test_pipe_subgroup_write_struct","test_pipe_subgroup_read_struct"};
diff --git a/test_conformance/pipes/test_pipe_subgroups.cpp b/test_conformance/pipes/test_pipe_subgroups.cpp
index b3e17183..8e2f6e57 100644
--- a/test_conformance/pipes/test_pipe_subgroups.cpp
+++ b/test_conformance/pipes/test_pipe_subgroups.cpp
@@ -114,9 +114,8 @@ int test_pipe_subgroups_divergence(cl_device_id deviceID, cl_context context, cl
 
     if (!is_extension_available(deviceID, "cl_khr_subgroups"))
     {
-        log_info(
-            "cl_khr_subgroups is not supported on this platoform. Skipping "
-            "test.\n");
+        log_info("cl_khr_subgroups is not supported on this platform. Skipping "
+                 "test.\n");
         return CL_SUCCESS;
     }
 
-- 
cgit v1.2.3


From 30cc3db4ecdaccd0b5367ca3da2bda013fc6b81b Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Mon, 3 Oct 2022 22:09:28 +0100
Subject: atomics: Fix -Wformat warnings (#1519)

The main sources of warnings were:

 * Printing of `i` which is a `size_t` requiring the `%zu` specifier.

 * Printing of `cl_long` which is now done using the `PRId64` macro
   to ensure portability across 32 and 64-bit builds.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/atomics/test_atomics.cpp | 37 ++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/test_conformance/atomics/test_atomics.cpp b/test_conformance/atomics/test_atomics.cpp
index 31d08500..caa4b78f 100644
--- a/test_conformance/atomics/test_atomics.cpp
+++ b/test_conformance/atomics/test_atomics.cpp
@@ -19,6 +19,8 @@
 #include <unistd.h>
 #endif
 
+#include <cinttypes>
+
 #define INT_TEST_VALUE 402258822
 #define LONG_TEST_VALUE 515154531254381446LL
 
@@ -359,7 +361,7 @@ int test_atomic_function(cl_device_id deviceID, cl_context context,
                 if (typeSize == 4)
                 {
                     cl_int *outValue = (cl_int *)(destItems + i * typeSize);
-                    log_error("ERROR: Result %ld from kernel does not "
+                    log_error("ERROR: Result %zu from kernel does not "
                               "validate! (should be %d, was %d)\n",
                               i, intVal, *outValue);
                     cl_int *startRefs = (cl_int *)startRefValues;
@@ -367,27 +369,28 @@ int test_atomic_function(cl_device_id deviceID, cl_context context,
                     for (i = 0; i < threadSize; i++)
                     {
                         if (startRefs != NULL)
-                            log_info(" --- %ld - %d --- %d\n", i, startRefs[i],
+                            log_info(" --- %zu - %d --- %d\n", i, startRefs[i],
                                      refs[i]);
                         else
-                            log_info(" --- %ld --- %d\n", i, refs[i]);
+                            log_info(" --- %zu --- %d\n", i, refs[i]);
                     }
                 }
                 else
                 {
                     cl_long *outValue = (cl_long *)(destItems + i * typeSize);
-                    log_error("ERROR: Result %ld from kernel does not "
-                              "validate! (should be %lld, was %lld)\n",
+                    log_error("ERROR: Result %zu from kernel does not "
+                              "validate! (should be %" PRId64 ", was %" PRId64
+                              ")\n",
                               i, longVal, *outValue);
                     cl_long *startRefs = (cl_long *)startRefValues;
                     cl_long *refs = (cl_long *)refValues;
                     for (i = 0; i < threadSize; i++)
                     {
                         if (startRefs != NULL)
-                            log_info(" --- %ld - %lld --- %lld\n", i,
-                                     startRefs[i], refs[i]);
+                            log_info(" --- %zu - %" PRId64 " --- %" PRId64 "\n",
+                                     i, startRefs[i], refs[i]);
                         else
-                            log_info(" --- %ld --- %lld\n", i, refs[i]);
+                            log_info(" --- %zu --- %" PRId64 "\n", i, refs[i]);
                     }
                 }
                 return -1;
@@ -476,7 +479,8 @@ int test_atomic_function(cl_device_id deviceID, cl_context context,
             cl_long *r = (cl_long *)refValues;
             log_error("ERROR: atomic function operated correctly but did NOT "
                       "return correct 'old' value "
-                      " (should have been %lld, returned %lld)!\n",
+                      " (should have been %" PRId64 ", returned %" PRId64
+                      ")!\n",
                       *s, *r);
         }
         return -1;
@@ -673,7 +677,7 @@ bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
         if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
             log_error(
-                "ERROR: Reference value %ld outside of valid range! (%d)\n", i,
+                "ERROR: Reference value %zu outside of valid range! (%d)\n", i,
                 refValues[i]);
             return false;
         }
@@ -702,7 +706,7 @@ bool test_atomic_xchg_verify_int(size_t size, cl_int *refValues,
     {
         if (valids[i] != 1)
         {
-            log_error("ERROR: Reference value %ld did not occur "
+            log_error("ERROR: Reference value %zu did not occur "
                       "once-and-only-once (occurred %d)\n",
                       i, valids[i]);
             for (size_t j = 0; j < size; j++)
@@ -738,7 +742,8 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
         if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
             log_error(
-                "ERROR: Reference value %ld outside of valid range! (%lld)\n",
+                "ERROR: Reference value %zu outside of valid range! (%" PRId64
+                ")\n",
                 i, refValues[i]);
             return false;
         }
@@ -749,7 +754,7 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
      executed, because that value should be the final value outputted */
     if (valids[finalValue] > 0)
     {
-        log_error("ERROR: Final value %lld was also in ref list!\n",
+        log_error("ERROR: Final value %" PRId64 " was also in ref list!\n",
                   finalValue);
         return false;
     }
@@ -768,7 +773,7 @@ bool test_atomic_xchg_verify_long(size_t size, cl_long *refValues,
     {
         if (valids[i] != 1)
         {
-            log_error("ERROR: Reference value %ld did not occur "
+            log_error("ERROR: Reference value %zu did not occur "
                       "once-and-only-once (occurred %d)\n",
                       i, valids[i]);
             for (size_t j = 0; j < size; j++)
@@ -805,7 +810,7 @@ bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
         if (refValues[i] < 0 || (size_t)refValues[i] >= size)
         {
             log_error(
-                "ERROR: Reference value %ld outside of valid range! (%a)\n", i,
+                "ERROR: Reference value %zu outside of valid range! (%a)\n", i,
                 refValues[i]);
             return false;
         }
@@ -834,7 +839,7 @@ bool test_atomic_xchg_verify_float(size_t size, cl_float *refValues,
     {
         if (valids[i] != 1)
         {
-            log_error("ERROR: Reference value %ld did not occur "
+            log_error("ERROR: Reference value %zu did not occur "
                       "once-and-only-once (occurred %d)\n",
                       i, valids[i]);
             for (size_t j = 0; j < size; j++)
-- 
cgit v1.2.3


From dbd33bc9cfd2ace62445a812a6aabb901c2f7e74 Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 4 Oct 2022 21:30:03 +0530
Subject: External sharing new updates (#1482)

* Fix enqueue_flags test to use correct barrier type.

Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE.
Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups
need to wait here.

* Add check for support for Read-Wrie images

Read-Write images have required OpenCL 2.x.
Read-Write image tests are already being skipped
for 1.x devices.
With OpenCL 3.0, read-write images being optional,
the tests should be run or skipped
depending on the implementation support.

Add a check to decide if Read-Write images are
supported or required to be supported depending
on OpenCL version and decide if the tests should
be run on skipped.

Fixes issue #894

* Fix formatting in case of Read-Write image checks.

Fix formatting in case of Read-write image checks.
Also, combine two ifs into one in case of
kerne_read_write tests

* Fix some more formatting for RW-image checks

Remove unnecessary spaces at various places.
Also, fix lengthy lines.

* Fix malloc-size calculation in test imagedim

unsigned char size is silently assumed to be 1
in imagedim test of test_basic.
Pass sizeof(type) in malloc size calculation.
Also, change loop variable from signed to unsigned.
Add checks for null pointer for malloced memory.

* Initial CTS for external sharing extensions

Initial set of tests for below extensions
with Vulkan as producer
1. cl_khr_external_memory
2. cl_khr_external_memory_win32
3. cl_khr_external_memory_opaque_fd
4. cl_khr_external_semaphore
5. cl_khr_external_semaphore_win32
6. cl_khr_external_semaphore_opaque_fd

* Updates to external sharing CTS

Updates to external sharing CTS
1. Fix some build issues to remove unnecessary, non-existent files
2. Add new tests for platform and device queries.
3. Some added checks for VK Support.

* Update CTS build script for Vulkan Headers

Update CTS build to clone Vulkan Headers
repo and pass it to CTS build
in preparation for external memory
and semaphore tests

* Fix Vulkan header path

Fix Vulkan header include path.

* Add Vulkan loader dependency

Vulkan loader is required to build
test_vulkan of OpenCL-CTS.
Clone and build Vulkan loader as prerequisite
to OpenCL-CTS.

* Fix Vulkan loader path in test_vulkan

Remove arch/os suffix in Vulkan loader path
to match vulkan loader repo build.

* Fix warnings around getHandle API.

Return type of getHandle is defined
differently based on win or linux builds.
Use appropriate guards when using API
at other places.
While at it remove duplicate definition
of ARRAY_SIZE.

* Use ARRAY_SIZE in harness.

Use already defined ARRAY_SIZE macro
from test_harness.

* Fix build issues for test_vulkan

Fix build issues for test_vulkan
1. Add cl_ext.h in common files
2. Replace cl_mem_properties_khr with cl_mem_properties
3. Replace cl_external_mem_handle_type_khr with
cl_external_memory_handle_type_khr
4. Type-cast malloc as required.

* Fix code formatting.

Fix code formatting to
get CTS CI builds clean.

* Fix formatting fixes part-2

Another set of formatting fixes.

* Fix code formatting part-3

Some more code formatting fixes.

* Fix code formatting issues part-4

More code formatting fixes.

* Formatting fixes part-5

Some more formatting fixes

* Fix formatting part-6

More formatting fixes continued.

* Code formatting fixes part-7

Code formatting fixes for image

* Code formatting fixes part-8

Fixes for platform and device query tests.

* Code formatting fixes part-9

More formatting fixes for vulkan_wrapper

* Code formatting fixes part-10

More fixes to wrapper header

* Code formatting fixes part-11

Formatting fixes for api_list

* Code formatting fixes part-12

Formatting fixes for api_list_map.

* Code formatting changes part-13

Code formatting changes for utility.

* Code formatting fixes part-15
Formatting fixes for wrapper.

* Misc Code formatting fixes

Some more misc code formatting fixes.

* Fix build breaks due to code formatting

Fix build issues arised with recent
code formatting issues.

* Fix presubmit script after merge

Fix presubmit script after merge conflicts.

* Fix Vulkan loader build in presubmit script.

Use cmake ninja and appropriate toolchain
for Vulkan loader dependency to fix
linking issue on arm/aarch64.

* Use static array sizes

Use static array sizes to fix
windows builds.

* Some left-out formatting fixes.

Fix remaining formatting issues.

* Fix harness header path

Fix harness header path
While at it, remove Misc and test pragma.

* Add/Fix license information

Add Khronos License info for test_vulkan.
Replace Apple license with Khronos
as applicable.

* Fix headers for Mac OSX builds.

Use appropriate headers for
Mac OSX builds

* Fix Mac OSX builds.

Use appropriate headers for
Mac OSX builds.
Also, fix some build issues
due to type-casting.

* Fix new code formatting issues

Fix new code formatting issues
with recent MacOS fixes.

* Add back missing case statement

Add back missing case statement
that was accidentally removed.

* Disable USE_GAS for Vulkan Loader build.

Disable USE_GAS for Vulkan Loader build
to fix aarch64 build.

* Fixes to OpenCL external sharing tests

Fix clReleaseSemaphore() API.
Fix copyright year.
Some other minor fixes.

* Improvements to OpenCL external sharing CTS

Use SPIR-V shaders instead of NV extension path
from GLSL to Vulkan shaders.
Fixes for lower end GPUs to use limited memory.
Update copy-right year at some more places.

* Fix new code formatting issues.

Fix code formatting issues with
recent changes for external sharing
tests.

* More formatting fixes.

More formatting fixes for recent
updates to external sharing tests.

* Final code formatting fixes.

Minor formatting fixes to get
format checks clean.
---
 test_conformance/vulkan/main.cpp                   |   4 +-
 test_conformance/vulkan/shaders/buffer.comp        |  28 ++++
 test_conformance/vulkan/shaders/buffer.spv         | Bin 0 -> 2168 bytes
 test_conformance/vulkan/shaders/image2D.comp       |  31 +++++
 test_conformance/vulkan/shaders/image2D_r16i.spv   | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_r16ui.spv  | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_r32f.spv   | Bin 0 -> 3268 bytes
 test_conformance/vulkan/shaders/image2D_r32i.spv   | Bin 0 -> 3256 bytes
 test_conformance/vulkan/shaders/image2D_r32ui.spv  | Bin 0 -> 3256 bytes
 test_conformance/vulkan/shaders/image2D_r8i.spv    | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_r8ui.spv   | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg16i.spv  | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg16ui.spv | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg32f.spv  | Bin 0 -> 3276 bytes
 test_conformance/vulkan/shaders/image2D_rg32i.spv  | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg32ui.spv | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg8i.spv   | Bin 0 -> 3264 bytes
 test_conformance/vulkan/shaders/image2D_rg8ui.spv  | Bin 0 -> 3264 bytes
 .../vulkan/shaders/image2D_rgba16i.spv             | Bin 0 -> 3256 bytes
 .../vulkan/shaders/image2D_rgba16ui.spv            | Bin 0 -> 3256 bytes
 .../vulkan/shaders/image2D_rgba32f.spv             | Bin 0 -> 3268 bytes
 .../vulkan/shaders/image2D_rgba32i.spv             | Bin 0 -> 3256 bytes
 .../vulkan/shaders/image2D_rgba32ui.spv            | Bin 0 -> 3256 bytes
 test_conformance/vulkan/shaders/image2D_rgba8i.spv | Bin 0 -> 3256 bytes
 .../vulkan/shaders/image2D_rgba8ui.spv             | Bin 0 -> 3256 bytes
 .../vulkan/test_vulkan_api_consistency.cpp         |  14 +-
 .../vulkan/test_vulkan_interop_buffer.cpp          |  36 +----
 .../vulkan/test_vulkan_interop_image.cpp           | 146 +++++++--------------
 .../opencl_vulkan_wrapper.cpp                      |  51 +++++--
 .../opencl_vulkan_wrapper.hpp                      |   6 +-
 .../vulkan_interop_common/vulkan_list_map.hpp      |   7 +-
 .../vulkan_interop_common/vulkan_utility.cpp       | 105 ++++++++-------
 .../vulkan_interop_common/vulkan_utility.hpp       |   1 +
 .../vulkan_interop_common/vulkan_wrapper.cpp       |  14 +-
 .../vulkan_interop_common/vulkan_wrapper.hpp       |   3 +-
 35 files changed, 232 insertions(+), 214 deletions(-)
 create mode 100644 test_conformance/vulkan/shaders/buffer.comp
 create mode 100644 test_conformance/vulkan/shaders/buffer.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D.comp
 create mode 100644 test_conformance/vulkan/shaders/image2D_r16i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r16ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r32f.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r32i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r32ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r8i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_r8ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg16i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg16ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg32f.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg32i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg32ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg8i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rg8ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba16i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba16ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32f.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba32ui.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba8i.spv
 create mode 100644 test_conformance/vulkan/shaders/image2D_rgba8ui.spv

diff --git a/test_conformance/vulkan/main.cpp b/test_conformance/vulkan/main.cpp
index 6cbde5cc..2eeb0c36 100644
--- a/test_conformance/vulkan/main.cpp
+++ b/test_conformance/vulkan/main.cpp
@@ -134,7 +134,6 @@ cl_device_id *devices;
 const size_t bufsize = BUFFERSIZE;
 char buf[BUFFERSIZE];
 cl_uchar uuid[CL_UUID_SIZE_KHR];
-VulkanDevice vkDevice;
 unsigned int numCQ;
 bool multiImport;
 bool multiCtx;
@@ -220,9 +219,12 @@ int main(int argc, const char *argv[])
     if (!checkVkSupport())
     {
         log_info("Vulkan supported GPU not found \n");
+        log_info("TEST SKIPPED \n");
         return 0;
     }
 
+    VulkanDevice vkDevice;
+
     cl_device_type requestedDeviceType = CL_DEVICE_TYPE_GPU;
     char *force_cpu = getenv("CL_DEVICE_TYPE");
     if (force_cpu != NULL)
diff --git a/test_conformance/vulkan/shaders/buffer.comp b/test_conformance/vulkan/shaders/buffer.comp
new file mode 100644
index 00000000..d8756f92
--- /dev/null
+++ b/test_conformance/vulkan/shaders/buffer.comp
@@ -0,0 +1,28 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int8    : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+#define MAX_BUFFERS 5
+
+layout(binding = 0) buffer Params
+{
+  uint32_t numBuffers;
+  uint32_t bufferSize;
+  uint32_t interBufferOffset;
+};
+layout(binding = 1) buffer Buffer
+{
+  uint8_t ptr[];
+} bufferPtrList[MAX_BUFFERS];
+layout(local_size_x = 512) in;
+void main() {
+    for (uint32_t bufIdx = 0; bufIdx < numBuffers; bufIdx++) {
+        uint32_t ptrIdx = gl_GlobalInvocationID.x;
+        uint32_t limit = bufferSize;
+        while (ptrIdx < limit) {
+            bufferPtrList[bufIdx].ptr[ptrIdx]++;
+            ptrIdx += (gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+        }
+    }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/buffer.spv b/test_conformance/vulkan/shaders/buffer.spv
new file mode 100644
index 00000000..685523ba
Binary files /dev/null and b/test_conformance/vulkan/shaders/buffer.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D.comp b/test_conformance/vulkan/shaders/image2D.comp
new file mode 100644
index 00000000..42fa2f73
--- /dev/null
+++ b/test_conformance/vulkan/shaders/image2D.comp
@@ -0,0 +1,31 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_EXT_shader_explicit_arithmetic_types_int32   : enable
+
+#define MAX_2D_IMAGES               5
+#define MAX_2D_IMAGE_MIP_LEVELS     11
+#define MAX_2D_IMAGE_DESCRIPTORS    MAX_2D_IMAGES * MAX_2D_IMAGE_MIP_LEVELS
+
+layout(binding = 0) buffer Params
+{
+    uint32_t numImage2DDescriptors;
+};
+layout(binding = 1, rgba32f ) uniform image2D image2DList[ MAX_2D_IMAGE_DESCRIPTORS ];
+layout(local_size_x = 32, local_size_y = 32) in;
+void main() {
+    uvec3 numThreads = gl_NumWorkGroups * gl_WorkGroupSize;
+    for (uint32_t image2DIdx = 0; image2DIdx < numImage2DDescriptors; image2DIdx++) {
+        ivec2 imageDim = imageSize(image2DList[image2DIdx]);
+        uint32_t heightBy2 = imageDim.y / 2;
+        for (uint32_t row = gl_GlobalInvocationID.y; row < heightBy2; row += numThreads.y) {
+            for (uint32_t col = gl_GlobalInvocationID.x; col < imageDim.x; col += numThreads.x) {
+                ivec2 coordsA = ivec2(col, row);
+                ivec2 coordsB = ivec2(col, imageDim.y - row - 1);
+                vec4 dataA = imageLoad(image2DList[image2DIdx], coordsA);
+                vec4 dataB = imageLoad(image2DList[image2DIdx], coordsB);
+                imageStore(image2DList[image2DIdx], coordsA, dataB);
+                imageStore(image2DList[image2DIdx], coordsB, dataA);
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/test_conformance/vulkan/shaders/image2D_r16i.spv b/test_conformance/vulkan/shaders/image2D_r16i.spv
new file mode 100644
index 00000000..00c5c283
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r16i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r16ui.spv b/test_conformance/vulkan/shaders/image2D_r16ui.spv
new file mode 100644
index 00000000..87514d9f
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r16ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32f.spv b/test_conformance/vulkan/shaders/image2D_r32f.spv
new file mode 100644
index 00000000..e82c9c19
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32f.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32i.spv b/test_conformance/vulkan/shaders/image2D_r32i.spv
new file mode 100644
index 00000000..7ea8d26f
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r32ui.spv b/test_conformance/vulkan/shaders/image2D_r32ui.spv
new file mode 100644
index 00000000..dbcdbc5f
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r32ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8i.spv b/test_conformance/vulkan/shaders/image2D_r8i.spv
new file mode 100644
index 00000000..1a641475
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r8i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_r8ui.spv b/test_conformance/vulkan/shaders/image2D_r8ui.spv
new file mode 100644
index 00000000..a90ccf98
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_r8ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16i.spv b/test_conformance/vulkan/shaders/image2D_rg16i.spv
new file mode 100644
index 00000000..07996173
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg16i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg16ui.spv b/test_conformance/vulkan/shaders/image2D_rg16ui.spv
new file mode 100644
index 00000000..f73e096b
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg16ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32f.spv b/test_conformance/vulkan/shaders/image2D_rg32f.spv
new file mode 100644
index 00000000..1489660e
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32f.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32i.spv b/test_conformance/vulkan/shaders/image2D_rg32i.spv
new file mode 100644
index 00000000..b7d302f4
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg32ui.spv b/test_conformance/vulkan/shaders/image2D_rg32ui.spv
new file mode 100644
index 00000000..6cf2f1b8
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg32ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8i.spv b/test_conformance/vulkan/shaders/image2D_rg8i.spv
new file mode 100644
index 00000000..a71b9bf0
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg8i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rg8ui.spv b/test_conformance/vulkan/shaders/image2D_rg8ui.spv
new file mode 100644
index 00000000..2aca9290
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rg8ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16i.spv b/test_conformance/vulkan/shaders/image2D_rgba16i.spv
new file mode 100644
index 00000000..0cb95dfd
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba16i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba16ui.spv b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv
new file mode 100644
index 00000000..84c3d3db
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba16ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32f.spv b/test_conformance/vulkan/shaders/image2D_rgba32f.spv
new file mode 100644
index 00000000..35136c58
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32f.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32i.spv b/test_conformance/vulkan/shaders/image2D_rgba32i.spv
new file mode 100644
index 00000000..4d1ae581
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba32ui.spv b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv
new file mode 100644
index 00000000..bed86f0c
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba32ui.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8i.spv b/test_conformance/vulkan/shaders/image2D_rgba8i.spv
new file mode 100644
index 00000000..edf8c58c
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba8i.spv differ
diff --git a/test_conformance/vulkan/shaders/image2D_rgba8ui.spv b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv
new file mode 100644
index 00000000..bb9a770c
Binary files /dev/null and b/test_conformance/vulkan/shaders/image2D_rgba8ui.spv differ
diff --git a/test_conformance/vulkan/test_vulkan_api_consistency.cpp b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
index 2987418f..f22ac319 100644
--- a/test_conformance/vulkan/test_vulkan_api_consistency.cpp
+++ b/test_conformance/vulkan/test_vulkan_api_consistency.cpp
@@ -238,7 +238,7 @@ int test_consistency_external_image(cl_device_id deviceID, cl_context _context,
     const VulkanMemoryTypeList& memoryTypeList = vkImage2D->getMemoryTypeList();
     uint64_t totalImageMemSize = vkImage2D->getSize();
 
-    log_info("Memory type index: %d\n", (uint32_t)memoryTypeList[0]);
+    log_info("Memory type index: %lu\n", (uint32_t)memoryTypeList[0]);
     log_info("Memory type property: %d\n",
              memoryTypeList[0].getMemoryTypeProperty());
     log_info("Image size : %d\n", totalImageMemSize);
@@ -552,17 +552,17 @@ int test_consistency_external_semaphore(cl_device_id deviceID,
 
 
     // Pass invalid object to release call
-    errNum = clReleaseSemaphoreObjectKHRptr(NULL);
+    errNum = clReleaseSemaphoreKHRptr(NULL);
     test_failure_error(errNum, CL_INVALID_VALUE,
-                       "clReleaseSemaphoreObjectKHRptr fails with "
+                       "clReleaseSemaphoreKHRptr fails with "
                        "CL_INVALID_VALUE when NULL semaphore object is passed");
 
     // Release both semaphore objects
-    errNum = clReleaseSemaphoreObjectKHRptr(clVk2Clsemaphore);
-    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+    errNum = clReleaseSemaphoreKHRptr(clVk2Clsemaphore);
+    test_error(errNum, "clReleaseSemaphoreKHRptr failed");
 
-    errNum = clReleaseSemaphoreObjectKHRptr(clCl2Vksemaphore);
-    test_error(errNum, "clReleaseSemaphoreObjectKHRptr failed");
+    errNum = clReleaseSemaphoreKHRptr(clCl2Vksemaphore);
+    test_error(errNum, "clReleaseSemaphoreKHRptr failed");
 
     return TEST_PASS;
 }
diff --git a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
index 7daf96de..9b0bc9de 100644
--- a/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_buffer.cpp
@@ -39,35 +39,6 @@ struct Params
 };
 }
 
-static const char *vkBufferShader =
-    "#version 450\n"
-    "#extension GL_ARB_separate_shader_objects : enable\n"
-    "#extension GL_NV_gpu_shader5 : enable\n"
-    "layout(binding = 0) buffer Params\n"
-    "{\n"
-    "    uint32_t numBuffers;\n"
-    "    uint32_t bufferSize;\n"
-    "    uint32_t interBufferOffset;\n"
-    "};\n"
-    "layout(binding = 1) buffer Buffer\n"
-    "{\n"
-    "    uint8_t ptr[];\n"
-    "} bufferPtrList[" STRING(
-        MAX_BUFFERS) "];\n"
-                     "layout(local_size_x = 512) in;\n"
-                     "void main() {\n"
-                     "    for (uint32_t bufIdx = 0; bufIdx < numBuffers;"
-                     " bufIdx++) {\n"
-                     "        uint32_t ptrIdx = gl_GlobalInvocationID.x;\n"
-                     "        uint32_t limit = bufferSize;\n"
-                     "        while (ptrIdx < limit) {\n"
-                     "            bufferPtrList[bufIdx].ptr[ptrIdx]++;\n"
-                     "            ptrIdx += (gl_NumWorkGroups.x * "
-                     "gl_WorkGroupSize.x);\n"
-                     "        }\n"
-                     "    }\n"
-                     "}\n";
-
 const char *kernel_text_numbuffer_1 = " \
 __kernel void clUpdateBuffer(int bufferSize, __global unsigned char *a) {  \n\
     int gid = get_global_id(0); \n\
@@ -149,6 +120,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -446,6 +419,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -716,6 +690,8 @@ int run_test_with_multi_import_same_ctx(
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
@@ -1050,6 +1026,8 @@ int run_test_with_multi_import_diff_ctx(
 
     VulkanQueue &vkQueue = vkDevice.getQueue();
 
+    std::vector<char> vkBufferShader = readFile("buffer.spv");
+
     VulkanShaderModule vkBufferShaderModule(vkDevice, vkBufferShader);
     VulkanDescriptorSetLayoutBindingList vkDescriptorSetLayoutBindingList(
         MAX_BUFFERS + 1, VULKAN_DESCRIPTOR_TYPE_STORAGE_BUFFER);
diff --git a/test_conformance/vulkan/test_vulkan_interop_image.cpp b/test_conformance/vulkan/test_vulkan_interop_image.cpp
index f1d0af1f..7577de09 100644
--- a/test_conformance/vulkan/test_vulkan_interop_image.cpp
+++ b/test_conformance/vulkan/test_vulkan_interop_image.cpp
@@ -25,8 +25,6 @@
 #define MAX_2D_IMAGE_ELEMENT_SIZE 16
 #define MAX_2D_IMAGE_MIP_LEVELS 11
 #define MAX_2D_IMAGE_DESCRIPTORS MAX_2D_IMAGES *MAX_2D_IMAGE_MIP_LEVELS
-#define GLSL_FORMAT_STRING "<GLSL_FORMAT>"
-#define GLSL_TYPE_PREFIX_STRING "<GLSL_TYPE_PREFIX>"
 #define NUM_THREADS_PER_GROUP_X 32
 #define NUM_THREADS_PER_GROUP_Y 32
 #define NUM_BLOCKS(size, blockSize)                                            \
@@ -54,61 +52,8 @@ struct Params
 }
 static cl_uchar uuid[CL_UUID_SIZE_KHR];
 static cl_device_id deviceId = NULL;
-
-static const char *vkImage2DShader =
-    "#version 450\n"
-    "#extension GL_ARB_separate_shader_objects : enable\n"
-    "#extension GL_NV_gpu_shader5 : enable\n"
-    "layout(binding = 0) buffer Params\n"
-    "{\n"
-    "    uint32_t numImage2DDescriptors;\n"
-    "};\n"
-    "layout(binding = 1, " GLSL_FORMAT_STRING
-    ") uniform " GLSL_TYPE_PREFIX_STRING "image2D image2DList[" STRING(
-        MAX_2D_IMAGE_DESCRIPTORS) "];\n"
-                                  "layout(local_size_x = 32, local_size_y = "
-                                  "32) in;\n"
-                                  "void main() {\n"
-                                  "    uvec3 numThreads = gl_NumWorkGroups * "
-                                  "gl_WorkGroupSize;\n"
-                                  "    for (uint32_t image2DIdx = 0; "
-                                  "image2DIdx < numImage2DDescriptors; "
-                                  "image2DIdx++)"
-                                  "    {\n"
-                                  "        ivec2 imageDim = "
-                                  "imageSize(image2DList[image2DIdx]);\n"
-                                  "        uint32_t heightBy2 = imageDim.y / "
-                                  "2;\n"
-                                  "        for (uint32_t row = "
-                                  "gl_GlobalInvocationID.y; row < heightBy2; "
-                                  "row += numThreads.y)"
-                                  "        {\n"
-                                  "            for (uint32_t col = "
-                                  "gl_GlobalInvocationID.x; col < imageDim.x; "
-                                  "col += numThreads.x)"
-                                  "            {\n"
-                                  "                ivec2 coordsA = ivec2(col, "
-                                  "row);\n"
-                                  "                ivec2 coordsB = ivec2(col, "
-                                  "imageDim.y - row - 1);\n"
-                                  "                " GLSL_TYPE_PREFIX_STRING
-                                  "vec4 dataA = "
-                                  "imageLoad(image2DList[image2DIdx], "
-                                  "coordsA);\n"
-                                  "                " GLSL_TYPE_PREFIX_STRING
-                                  "vec4 dataB = "
-                                  "imageLoad(image2DList[image2DIdx], "
-                                  "coordsB);\n"
-                                  "                "
-                                  "imageStore(image2DList[image2DIdx], "
-                                  "coordsA, dataB);\n"
-                                  "                "
-                                  "imageStore(image2DList[image2DIdx], "
-                                  "coordsB, dataA);\n"
-                                  "            }\n"
-                                  "        }\n"
-                                  "    }\n"
-                                  "}\n";
+size_t max_width = MAX_2D_IMAGE_WIDTH;
+size_t max_height = MAX_2D_IMAGE_HEIGHT;
 
 const char *kernel_text_numImage_1 = " \
 __constant sampler_t smpImg = CLK_NORMALIZED_COORDS_FALSE|CLK_ADDRESS_NONE|CLK_FILTER_NEAREST;\n\
@@ -268,8 +213,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
 
-    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
-        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    uint64_t maxImage2DSize =
+        max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
     VulkanDeviceMemory vkSrcBufferDeviceMemory(
         vkDevice, vkSrcBuffer.getSize(),
@@ -310,6 +255,12 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
     clCl2VkExternalSemaphore = new clExternalSemaphore(
         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
 
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+    std::vector<char> vkImage2DShader;
+
     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
     {
         VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -317,15 +268,13 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
         log_info("elementSize= %d\n", elementSize);
-        std::map<std::string, std::string> patternToSubstituteMap;
-        patternToSubstituteMap[GLSL_FORMAT_STRING] =
-            getVulkanFormatGLSLFormat(vkFormat);
-        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
-            getVulkanFormatGLSLTypePrefix(vkFormat);
-
-        VulkanShaderModule vkImage2DShaderModule(
-            vkDevice,
-            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+
+        std::string fileName = "image2D_"
+            + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+        log_info("Load %s file", fileName.c_str());
+        vkImage2DShader = readFile(fileName);
+        VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
                                                 vkImage2DShaderModule);
 
@@ -333,13 +282,13 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
         {
             uint32_t width = widthList[wIdx];
             log_info("Width: %d\n", width);
-            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            if (width > max_width) continue;
             region[0] = width;
             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
             {
                 uint32_t height = heightList[hIdx];
                 log_info("Height: %d", height);
-                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                if (height > max_height) continue;
                 region[1] = height;
 
                 uint32_t numMipLevels = 1;
@@ -418,14 +367,6 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
                         const VulkanMemoryTypeList &memoryTypeList =
                             vkDummyImage2D.getMemoryTypeList();
 
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory1;
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory2;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory1;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory2;
                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
                              mtIdx++)
                         {
@@ -834,6 +775,8 @@ int run_test_with_two_queue(cl_context &context, cl_command_queue &cmd_queue1,
                 }
             }
         }
+
+        vkImage2DShader.clear();
     }
 CLEANUP:
     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -866,8 +809,8 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
                             VULKAN_MEMORY_TYPE_PROPERTY_HOST_VISIBLE_COHERENT));
     vkParamsDeviceMemory.bindBuffer(vkParamsBuffer);
 
-    uint64_t maxImage2DSize = MAX_2D_IMAGE_WIDTH * MAX_2D_IMAGE_HEIGHT
-        * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
+    uint64_t maxImage2DSize =
+        max_width * max_height * MAX_2D_IMAGE_ELEMENT_SIZE * 2;
     VulkanBuffer vkSrcBuffer(vkDevice, maxImage2DSize);
     VulkanDeviceMemory vkSrcBufferDeviceMemory(
         vkDevice, vkSrcBuffer.getSize(),
@@ -908,6 +851,12 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
     clCl2VkExternalSemaphore = new clExternalSemaphore(
         vkCl2VkSemaphore, context, vkExternalSemaphoreHandleType, deviceId);
 
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory1;
+    std::vector<VulkanDeviceMemory *> vkNonDedicatedImage2DListDeviceMemory2;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory1;
+    std::vector<clExternalMemoryImage *> nonDedicatedExternalMemory2;
+    std::vector<char> vkImage2DShader;
+
     for (size_t fIdx = 0; fIdx < vkFormatList.size(); fIdx++)
     {
         VulkanFormat vkFormat = vkFormatList[fIdx];
@@ -915,15 +864,13 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
         uint32_t elementSize = getVulkanFormatElementSize(vkFormat);
         ASSERT_LEQ(elementSize, (uint32_t)MAX_2D_IMAGE_ELEMENT_SIZE);
         log_info("elementSize= %d\n", elementSize);
-        std::map<std::string, std::string> patternToSubstituteMap;
-        patternToSubstituteMap[GLSL_FORMAT_STRING] =
-            getVulkanFormatGLSLFormat(vkFormat);
-        patternToSubstituteMap[GLSL_TYPE_PREFIX_STRING] =
-            getVulkanFormatGLSLTypePrefix(vkFormat);
-
-        VulkanShaderModule vkImage2DShaderModule(
-            vkDevice,
-            prepareVulkanShader(vkImage2DShader, patternToSubstituteMap));
+
+        std::string fileName = "image2D_"
+            + std::string(getVulkanFormatGLSLFormat(vkFormat)) + ".spv";
+        log_info("Load %s file", fileName.c_str());
+        vkImage2DShader = readFile(fileName);
+        VulkanShaderModule vkImage2DShaderModule(vkDevice, vkImage2DShader);
+
         VulkanComputePipeline vkComputePipeline(vkDevice, vkPipelineLayout,
                                                 vkImage2DShaderModule);
 
@@ -931,13 +878,13 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
         {
             uint32_t width = widthList[wIdx];
             log_info("Width: %d\n", width);
-            ASSERT_LEQ(width, (uint32_t)MAX_2D_IMAGE_WIDTH);
+            if (width > max_width) continue;
             region[0] = width;
             for (size_t hIdx = 0; hIdx < ARRAY_SIZE(heightList); hIdx++)
             {
                 uint32_t height = heightList[hIdx];
                 log_info("Height: %d\n", height);
-                ASSERT_LEQ(height, (uint32_t)MAX_2D_IMAGE_HEIGHT);
+                if (height > max_height) continue;
                 region[1] = height;
 
                 uint32_t numMipLevels = 1;
@@ -1016,14 +963,6 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
                         const VulkanMemoryTypeList &memoryTypeList =
                             vkDummyImage2D.getMemoryTypeList();
 
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory1;
-                        std::vector<VulkanDeviceMemory *>
-                            vkNonDedicatedImage2DListDeviceMemory2;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory1;
-                        std::vector<clExternalMemoryImage *>
-                            nonDedicatedExternalMemory2;
                         for (size_t mtIdx = 0; mtIdx < memoryTypeList.size();
                              mtIdx++)
                         {
@@ -1368,6 +1307,7 @@ int run_test_with_one_queue(cl_context &context, cl_command_queue &cmd_queue1,
                 }
             }
         }
+        vkImage2DShader.clear();
     }
 CLEANUP:
     if (clVk2CLExternalSemaphore) delete clVk2CLExternalSemaphore;
@@ -1494,6 +1434,14 @@ int test_image_common(cl_device_id device_, cl_context context_,
         goto CLEANUP;
     }
     deviceId = devices[device_no];
+    err = setMaxImageDimensions(deviceId, max_width, max_height);
+    if (CL_SUCCESS != err)
+    {
+        print_error(err, "error setting max image dimensions");
+        goto CLEANUP;
+    }
+    log_info("Set max_width to %lu and max_height to %lu\n", max_width,
+             max_height);
     context = clCreateContextFromType(contextProperties, CL_DEVICE_TYPE_GPU,
                                       NULL, NULL, &err);
     if (CL_SUCCESS != err)
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
index 136818f6..9d9a6601 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.cpp
@@ -23,6 +23,7 @@
 #include <stdexcept>
 
 #define ASSERT(x) assert((x))
+#define GB(x) ((unsigned long long)(x) << 30)
 
 pfnclCreateSemaphoreWithPropertiesKHR clCreateSemaphoreWithPropertiesKHRptr;
 pfnclEnqueueWaitSemaphoresKHR clEnqueueWaitSemaphoresKHRptr;
@@ -31,7 +32,7 @@ pfnclEnqueueAcquireExternalMemObjectsKHR
     clEnqueueAcquireExternalMemObjectsKHRptr;
 pfnclEnqueueReleaseExternalMemObjectsKHR
     clEnqueueReleaseExternalMemObjectsKHRptr;
-pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
 
 void init_cl_vk_ext(cl_platform_id opencl_platform)
 {
@@ -51,13 +52,13 @@ void init_cl_vk_ext(cl_platform_id opencl_platform)
         throw std::runtime_error("Failed to get the function pointer of "
                                  "clEnqueueSignalSemaphoresKHRptr!");
     }
-    clReleaseSemaphoreObjectKHRptr = (pfnclReleaseSemaphoreObjectKHR)
-        clGetExtensionFunctionAddressForPlatform(opencl_platform,
-                                                 "clReleaseSemaphoreObjectKHR");
-    if (NULL == clReleaseSemaphoreObjectKHRptr)
+    clReleaseSemaphoreKHRptr =
+        (pfnclReleaseSemaphoreKHR)clGetExtensionFunctionAddressForPlatform(
+            opencl_platform, "clReleaseSemaphoreKHR");
+    if (NULL == clReleaseSemaphoreKHRptr)
     {
         throw std::runtime_error("Failed to get the function pointer of "
-                                 "clReleaseSemaphoreObjectKHRptr!");
+                                 "clReleaseSemaphoreKHRptr!");
     }
     clCreateSemaphoreWithPropertiesKHRptr =
         (pfnclCreateSemaphoreWithPropertiesKHR)
@@ -70,6 +71,40 @@ void init_cl_vk_ext(cl_platform_id opencl_platform)
     }
 }
 
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &max_width,
+                             size_t &max_height)
+{
+    cl_int result = CL_SUCCESS;
+    cl_ulong val;
+    size_t paramSize;
+
+    result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
+                             sizeof(cl_ulong), &val, &paramSize);
+
+    if (result != CL_SUCCESS)
+    {
+        return result;
+    }
+
+    if (val < GB(4))
+    {
+        max_width = 256;
+        max_height = 256;
+    }
+    else if (val < GB(8))
+    {
+        max_width = 512;
+        max_height = 256;
+    }
+    else
+    {
+        max_width = 1024;
+        max_height = 512;
+    }
+
+    return result;
+}
+
 cl_int getCLFormatFromVkFormat(VkFormat vkFormat,
                                cl_image_format *clImageFormat)
 {
@@ -798,10 +833,10 @@ clExternalSemaphore::clExternalSemaphore(
 
 clExternalSemaphore::~clExternalSemaphore()
 {
-    cl_int err = clReleaseSemaphoreObjectKHRptr(m_externalSemaphore);
+    cl_int err = clReleaseSemaphoreKHRptr(m_externalSemaphore);
     if (err != CL_SUCCESS)
     {
-        throw std::runtime_error("clReleaseSemaphoreObjectKHR failed!");
+        throw std::runtime_error("clReleaseSemaphoreKHR failed!");
     }
 }
 
diff --git a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
index c1d2a766..d9f8dccb 100644
--- a/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/opencl_vulkan_wrapper.hpp
@@ -49,7 +49,7 @@ typedef cl_int (*pfnclEnqueueReleaseExternalMemObjectsKHR)(
     cl_command_queue command_queue, cl_uint num_mem_objects,
     const cl_mem *mem_objects, cl_uint num_events_in_wait_list,
     const cl_event *event_wait_list, cl_event *event);
-typedef cl_int (*pfnclReleaseSemaphoreObjectKHR)(cl_semaphore_khr sema_object);
+typedef cl_int (*pfnclReleaseSemaphoreKHR)(cl_semaphore_khr sema_object);
 
 extern pfnclCreateSemaphoreWithPropertiesKHR
     clCreateSemaphoreWithPropertiesKHRptr;
@@ -59,7 +59,7 @@ extern pfnclEnqueueAcquireExternalMemObjectsKHR
     clEnqueueAcquireExternalMemObjectsKHRptr;
 extern pfnclEnqueueReleaseExternalMemObjectsKHR
     clEnqueueReleaseExternalMemObjectsKHRptr;
-extern pfnclReleaseSemaphoreObjectKHR clReleaseSemaphoreObjectKHRptr;
+extern pfnclReleaseSemaphoreKHR clReleaseSemaphoreKHRptr;
 
 cl_int getCLImageInfoFromVkImageInfo(const VkImageCreateInfo *, size_t,
                                      cl_image_format *, cl_image_desc *);
@@ -69,6 +69,8 @@ cl_int check_external_memory_handle_type(
 cl_int check_external_semaphore_handle_type(
     cl_device_id deviceID,
     cl_external_semaphore_handle_type_khr requiredHandleType);
+cl_int setMaxImageDimensions(cl_device_id deviceID, size_t &width,
+                             size_t &height);
 
 class clExternalMemory {
 protected:
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
index 831403e1..10a7b221 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -335,11 +335,8 @@ const VulkanWrapper &
 template <class VulkanWrapper, class VulkanNative>
 VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
 {
-    if (idx < m_wrapperList.size())
-    {
-        // CHECK_LT(idx, m_wrapperList.size());
-        return m_wrapperList[idx].get();
-    }
+    // CHECK_LT(idx, m_wrapperList.size());
+    return m_wrapperList[idx].get();
 }
 
 template <class VulkanWrapper, class VulkanNative>
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
index 81e12621..4e6118b1 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -18,6 +18,7 @@
 #include "vulkan_wrapper.hpp"
 #include <assert.h>
 #include <iostream>
+#include <fstream>
 #include <set>
 #include <string>
 #include <CL/cl.h>
@@ -541,59 +542,6 @@ const char *getVulkanFormatGLSLFormat(VulkanFormat format)
     return (const char *)size_t(0);
 }
 
-const char *getVulkanFormatGLSLTypePrefix(VulkanFormat format)
-{
-    switch (format)
-    {
-        case VULKAN_FORMAT_R8_UINT:
-        case VULKAN_FORMAT_R8G8_UINT:
-        case VULKAN_FORMAT_R8G8B8A8_UINT:
-        case VULKAN_FORMAT_R16_UINT:
-        case VULKAN_FORMAT_R16G16_UINT:
-        case VULKAN_FORMAT_R16G16B16A16_UINT:
-        case VULKAN_FORMAT_R32_UINT:
-        case VULKAN_FORMAT_R32G32_UINT:
-        case VULKAN_FORMAT_R32G32B32A32_UINT: return "u";
-
-        case VULKAN_FORMAT_R8_SINT:
-        case VULKAN_FORMAT_R8G8_SINT:
-        case VULKAN_FORMAT_R8G8B8A8_SINT:
-        case VULKAN_FORMAT_R16_SINT:
-        case VULKAN_FORMAT_R16G16_SINT:
-        case VULKAN_FORMAT_R16G16B16A16_SINT:
-        case VULKAN_FORMAT_R32_SINT:
-        case VULKAN_FORMAT_R32G32_SINT:
-        case VULKAN_FORMAT_R32G32B32A32_SINT: return "i";
-
-        case VULKAN_FORMAT_R32_SFLOAT:
-        case VULKAN_FORMAT_R32G32_SFLOAT:
-        case VULKAN_FORMAT_R32G32B32A32_SFLOAT: return "";
-
-        default: ASSERT(0); std::cout << "Unknown format";
-    }
-
-    return "";
-}
-
-std::string prepareVulkanShader(
-    std::string shaderCode,
-    const std::map<std::string, std::string> &patternToSubstituteMap)
-{
-    for (std::map<std::string, std::string>::const_iterator psIt =
-             patternToSubstituteMap.begin();
-         psIt != patternToSubstituteMap.end(); ++psIt)
-    {
-        std::string::size_type pos = 0u;
-        while ((pos = shaderCode.find(psIt->first, pos)) != std::string::npos)
-        {
-            shaderCode.replace(pos, psIt->first.length(), psIt->second);
-            pos += psIt->second.length();
-        }
-    }
-
-    return shaderCode;
-}
-
 std::ostream &operator<<(std::ostream &os,
                          VulkanMemoryTypeProperty memoryTypeProperty)
 {
@@ -691,3 +639,54 @@ std::ostream &operator<<(std::ostream &os, VulkanFormat format)
 
     return os;
 }
+
+static char *findFilePath(const std::string filename)
+{
+    const char *searchPath[] = {
+        "./", // Same dir
+        "./shaders/", // In shaders folder in same dir
+        "../test_conformance/vulkan/shaders/" // In src folder
+    };
+    for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i)
+    {
+        std::string path(searchPath[i]);
+
+        path.append(filename);
+        FILE *fp;
+        fp = fopen(path.c_str(), "rb");
+
+        if (fp != NULL)
+        {
+            fclose(fp);
+            // File found
+            char *file_path = (char *)(malloc(path.length() + 1));
+            strncpy(file_path, path.c_str(), path.length() + 1);
+            return file_path;
+        }
+        if (fp)
+        {
+            fclose(fp);
+        }
+    }
+    // File not found
+    return 0;
+}
+
+std::vector<char> readFile(const std::string &filename)
+{
+    char *file_path = findFilePath(filename);
+
+    std::ifstream file(file_path, std::ios::ate | std::ios::binary);
+
+    if (!file.is_open())
+    {
+        throw std::runtime_error("failed to open shader spv file!\n");
+    }
+    size_t fileSize = (size_t)file.tellg();
+    std::vector<char> buffer(fileSize);
+    file.seekg(0);
+    file.read(buffer.data(), fileSize);
+    file.close();
+    printf("filesize is %d", fileSize);
+    return buffer;
+}
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
index 7022fd5a..04f5a594 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.hpp
@@ -66,4 +66,5 @@ operator<<(std::ostream& os,
            VulkanExternalSemaphoreHandleType externalSemaphoreHandleType);
 std::ostream& operator<<(std::ostream& os, VulkanFormat format);
 
+std::vector<char> readFile(const std::string& filename);
 #endif // _vulkan_utility_hpp_
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
index c044e009..e5d3a271 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -201,7 +201,8 @@ VulkanInstance::VulkanInstance(): m_vkInstance(VK_NULL_HANDLE)
 
     if (physicalDeviceCount == uint32_t(0))
     {
-        throw std::runtime_error("failed to find GPUs with Vulkan support!");
+        std::cout << "failed to find GPUs with Vulkan support!\n";
+        return;
     }
 
     std::vector<VkPhysicalDevice> vkPhysicalDeviceList(physicalDeviceCount,
@@ -846,23 +847,18 @@ VulkanShaderModule::VulkanShaderModule(const VulkanShaderModule &shaderModule)
 {}
 
 VulkanShaderModule::VulkanShaderModule(const VulkanDevice &device,
-                                       const std::string &code)
+                                       const std::vector<char> &code)
     : m_device(device)
 {
-    std::string paddedCode = code;
-    while (paddedCode.size() % 4)
-    {
-        paddedCode += " ";
-    }
 
     VkShaderModuleCreateInfo vkShaderModuleCreateInfo = {};
     vkShaderModuleCreateInfo.sType =
         VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
     vkShaderModuleCreateInfo.pNext = NULL;
     vkShaderModuleCreateInfo.flags = 0;
-    vkShaderModuleCreateInfo.codeSize = paddedCode.size();
+    vkShaderModuleCreateInfo.codeSize = code.size();
     vkShaderModuleCreateInfo.pCode =
-        (const uint32_t *)(void *)paddedCode.c_str();
+        reinterpret_cast<const uint32_t *>(code.data());
 
     vkCreateShaderModule(m_device, &vkShaderModuleCreateInfo, NULL,
                          &m_vkShaderModule);
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
index 1f68a92b..37925ee4 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.hpp
@@ -240,7 +240,8 @@ protected:
     VulkanShaderModule(const VulkanShaderModule &shaderModule);
 
 public:
-    VulkanShaderModule(const VulkanDevice &device, const std::string &code);
+    VulkanShaderModule(const VulkanDevice &device,
+                       const std::vector<char> &code);
     virtual ~VulkanShaderModule();
     operator VkShaderModule() const;
 };
-- 
cgit v1.2.3


From 6659a1b6b8a4a989fe5d28ebd012c15f4e6872cf Mon Sep 17 00:00:00 2001
From: Ben Ashbaugh <ben.ashbaugh@intel.com>
Date: Tue, 4 Oct 2022 09:02:25 -0700
Subject: remove implicit conversion to pointer to fix 32-bit compile (#1488)

* remove implicit conversion to pointer to fix 32-bit compile

* fix formatting
---
 .../vulkan/vulkan_interop_common/vulkan_list_map.hpp        |  4 ++--
 .../vulkan/vulkan_interop_common/vulkan_utility.cpp         |  2 +-
 .../vulkan/vulkan_interop_common/vulkan_wrapper.cpp         | 13 +++++++------
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
index 10a7b221..52206779 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_list_map.hpp
@@ -37,7 +37,7 @@ public:
     virtual size_t size() const;
     virtual const VulkanWrapper &operator[](size_t idx) const;
     virtual VulkanWrapper &operator[](size_t idx);
-    virtual operator const VulkanNative *() const;
+    virtual const VulkanNative *operator()() const;
 };
 
 template <class VulkanKey, class VulkanValue> class VulkanMap {
@@ -340,7 +340,7 @@ VulkanWrapper &VulkanList<VulkanWrapper, VulkanNative>::operator[](size_t idx)
 }
 
 template <class VulkanWrapper, class VulkanNative>
-VulkanList<VulkanWrapper, VulkanNative>::operator const VulkanNative *() const
+const VulkanNative *VulkanList<VulkanWrapper, VulkanNative>::operator()() const
 {
     return m_nativeList.data();
 }
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
index 4e6118b1..1a313cce 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_utility.cpp
@@ -183,7 +183,7 @@ bool checkVkSupport()
     const VulkanInstance &instance = getVulkanInstance();
     const VulkanPhysicalDeviceList &physicalDeviceList =
         instance.getPhysicalDeviceList();
-    if (physicalDeviceList == NULL)
+    if (physicalDeviceList() == NULL)
     {
         std::cout << "physicalDeviceList is null, No GPUs found with "
                      "Vulkan support !!!\n";
diff --git a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
index e5d3a271..6209a747 100644
--- a/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
+++ b/test_conformance/vulkan/vulkan_interop_common/vulkan_wrapper.cpp
@@ -626,12 +626,12 @@ void VulkanQueue::submit(const VulkanSemaphoreList &waitSemaphoreList,
     vkSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
     vkSubmitInfo.pNext = NULL;
     vkSubmitInfo.waitSemaphoreCount = (uint32_t)waitSemaphoreList.size();
-    vkSubmitInfo.pWaitSemaphores = waitSemaphoreList;
+    vkSubmitInfo.pWaitSemaphores = waitSemaphoreList();
     vkSubmitInfo.pWaitDstStageMask = vkPipelineStageFlagsList.data();
     vkSubmitInfo.commandBufferCount = (uint32_t)commandBufferList.size();
-    vkSubmitInfo.pCommandBuffers = commandBufferList;
+    vkSubmitInfo.pCommandBuffers = commandBufferList();
     vkSubmitInfo.signalSemaphoreCount = (uint32_t)signalSemaphoreList.size();
-    vkSubmitInfo.pSignalSemaphores = signalSemaphoreList;
+    vkSubmitInfo.pSignalSemaphores = signalSemaphoreList();
 
     vkQueueSubmit(m_vkQueue, 1, &vkSubmitInfo, NULL);
 }
@@ -729,7 +729,8 @@ void VulkanDescriptorSetLayout::VulkanDescriptorSetLayoutCommon(
     vkDescriptorSetLayoutCreateInfo.flags = 0;
     vkDescriptorSetLayoutCreateInfo.bindingCount =
         (uint32_t)descriptorSetLayoutBindingList.size();
-    vkDescriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindingList;
+    vkDescriptorSetLayoutCreateInfo.pBindings =
+        descriptorSetLayoutBindingList();
 
     vkCreateDescriptorSetLayout(m_device, &vkDescriptorSetLayoutCreateInfo,
                                 NULL, &m_vkDescriptorSetLayout);
@@ -800,7 +801,7 @@ void VulkanPipelineLayout::VulkanPipelineLayoutCommon(
     vkPipelineLayoutCreateInfo.flags = 0;
     vkPipelineLayoutCreateInfo.setLayoutCount =
         (uint32_t)descriptorSetLayoutList.size();
-    vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList;
+    vkPipelineLayoutCreateInfo.pSetLayouts = descriptorSetLayoutList();
     vkPipelineLayoutCreateInfo.pushConstantRangeCount = 0;
     vkPipelineLayoutCreateInfo.pPushConstantRanges = NULL;
 
@@ -1573,7 +1574,7 @@ VulkanImage::VulkanImage(
     vkImageCreateInfo.queueFamilyIndexCount =
         (uint32_t)m_device.getPhysicalDevice().getQueueFamilyList().size();
     vkImageCreateInfo.pQueueFamilyIndices =
-        m_device.getPhysicalDevice().getQueueFamilyList();
+        m_device.getPhysicalDevice().getQueueFamilyList()();
     vkImageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
 
     VkExternalMemoryImageCreateInfo vkExternalMemoryImageCreateInfo = {};
-- 
cgit v1.2.3


From 07b055cd68072bf53151f2f059ba89c8e876c0d7 Mon Sep 17 00:00:00 2001
From: Nikhil Joshi <nikhilj@nvidia.com>
Date: Tue, 4 Oct 2022 21:43:18 +0530
Subject: Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX (#1501)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Fix enqueue_flags test to use correct barrier type.

Currently, enqueue_flags test uses CLK_LOCAL_MEM_FENCE.
Use CLK_GLOBAL_MEM_FENCE instead as all threads across work-groups
need to wait here.

* Add check for support for Read-Wrie images

Read-Write images have required OpenCL 2.x.
Read-Write image tests are already being skipped
for 1.x devices.
With OpenCL 3.0, read-write images being optional,
the tests should be run or skipped
depending on the implementation support.

Add a check to decide if Read-Write images are
supported or required to be supported depending
on OpenCL version and decide if the tests should
be run on skipped.

Fixes issue #894

* Fix formatting in case of Read-Write image checks.

Fix formatting in case of Read-write image checks.
Also, combine two ifs into one in case of
kerne_read_write tests

* Fix some more formatting for RW-image checks

Remove unnecessary spaces at various places.
Also, fix lengthy lines.

* Fix malloc-size calculation in test imagedim

unsigned char size is silently assumed to be 1
in imagedim test of test_basic.
Pass sizeof(type) in malloc size calculation.
Also, change loop variable from signed to unsigned.
Add checks for null pointer for malloced memory.

* Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX

Cap CL_DEVICE_MAX_MEM_ALLOC_SIZE to SIZE_MAX
when CL_DEVICE_GLOBAL_MEM_SIZE is capped with SIZE_MAX.
test_allocation caps the value of GLOBAL_MEM_SIZE to SIZE_MAX
if it exceeds the value of SIZE_MAX(value depends on platform bitness),
but doesn’t modify MAX_ALLOC_SIZE the same way.
Due to this MAX_ALLOC_SIZE becomes greater than GLOBAL_MEM_SIZE
and the test fails.

Modify MAX_MEM_ALLOC_SIZE as GLOBAL_MEM_SIZE when it exceeds SIZE_MAX

OpenCL-CTS #1022
---
 test_conformance/images/clCopyImage/test_copy_1D.cpp             | 1 +
 test_conformance/images/clCopyImage/test_copy_1D_array.cpp       | 1 +
 test_conformance/images/clCopyImage/test_copy_2D.cpp             | 1 +
 test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp    | 1 +
 test_conformance/images/clCopyImage/test_copy_2D_3D.cpp          | 1 +
 test_conformance/images/clCopyImage/test_copy_2D_array.cpp       | 1 +
 test_conformance/images/clCopyImage/test_copy_3D.cpp             | 1 +
 test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp    | 1 +
 test_conformance/images/clFillImage/test_fill_1D.cpp             | 1 +
 test_conformance/images/clFillImage/test_fill_1D_array.cpp       | 1 +
 test_conformance/images/clFillImage/test_fill_2D.cpp             | 1 +
 test_conformance/images/clFillImage/test_fill_2D_array.cpp       | 1 +
 test_conformance/images/clFillImage/test_fill_3D.cpp             | 1 +
 test_conformance/images/clGetInfo/test_1D.cpp                    | 1 +
 test_conformance/images/clGetInfo/test_1D_2D_array.cpp           | 2 ++
 test_conformance/images/clGetInfo/test_2D.cpp                    | 1 +
 test_conformance/images/clGetInfo/test_3D.cpp                    | 1 +
 test_conformance/images/clReadWriteImage/test_read_1D.cpp        | 1 +
 test_conformance/images/clReadWriteImage/test_read_1D_array.cpp  | 1 +
 test_conformance/images/clReadWriteImage/test_read_2D.cpp        | 1 +
 test_conformance/images/clReadWriteImage/test_read_2D_array.cpp  | 1 +
 test_conformance/images/clReadWriteImage/test_read_3D.cpp        | 1 +
 test_conformance/images/kernel_image_methods/test_1D.cpp         | 1 +
 test_conformance/images/kernel_image_methods/test_1D_array.cpp   | 1 +
 test_conformance/images/kernel_image_methods/test_2D.cpp         | 1 +
 test_conformance/images/samplerlessReads/test_iterations.cpp     | 1 +
 test_conformance/images/samplerlessReads/test_read_1D.cpp        | 1 +
 test_conformance/images/samplerlessReads/test_read_1D_array.cpp  | 1 +
 test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp | 1 +
 test_conformance/images/samplerlessReads/test_read_2D_array.cpp  | 1 +
 test_conformance/images/samplerlessReads/test_read_3D.cpp        | 1 +
 31 files changed, 32 insertions(+)

diff --git a/test_conformance/images/clCopyImage/test_copy_1D.cpp b/test_conformance/images/clCopyImage/test_copy_1D.cpp
index 2c996c72..0f6f3ce4 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D.cpp
@@ -113,6 +113,7 @@ int test_copy_image_set_1D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
index 0b616934..f0b610bb 100644
--- a/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_1D_array.cpp
@@ -118,6 +118,7 @@ int test_copy_image_set_1D_array( cl_device_id device, cl_context context, cl_co
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D.cpp b/test_conformance/images/clCopyImage/test_copy_2D.cpp
index 1a69a1fe..448b47f0 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D.cpp
@@ -125,6 +125,7 @@ int test_copy_image_set_2D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
index eb6dd552..1819d87c 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_2D_array.cpp
@@ -224,6 +224,7 @@ int test_copy_image_set_2D_2D_array( cl_device_id device, cl_context context, cl
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
index 8a56c95f..4ab6b42a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_3D.cpp
@@ -230,6 +230,7 @@ int test_copy_image_set_2D_3D( cl_device_id device, cl_context context, cl_comma
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
index 6327ba58..3376bf9a 100644
--- a/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_2D_array.cpp
@@ -71,6 +71,7 @@ int test_copy_image_set_2D_array( cl_device_id device, cl_context context, cl_co
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D.cpp b/test_conformance/images/clCopyImage/test_copy_3D.cpp
index da6731d7..cdfdccec 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D.cpp
@@ -57,6 +57,7 @@ int test_copy_image_set_3D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
index c098f645..1da1e477 100644
--- a/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
+++ b/test_conformance/images/clCopyImage/test_copy_3D_2D_array.cpp
@@ -251,6 +251,7 @@ int test_copy_image_set_3D_2D_array(cl_device_id device, cl_context context, cl_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D.cpp b/test_conformance/images/clFillImage/test_fill_1D.cpp
index c3f23185..b1550bf3 100644
--- a/test_conformance/images/clFillImage/test_fill_1D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D.cpp
@@ -80,6 +80,7 @@ int test_fill_image_set_1D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_1D_array.cpp b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
index b4347a47..be32ec6a 100644
--- a/test_conformance/images/clFillImage/test_fill_1D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_1D_array.cpp
@@ -83,6 +83,7 @@ int test_fill_image_set_1D_array( cl_device_id device, cl_context context, cl_co
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D.cpp b/test_conformance/images/clFillImage/test_fill_2D.cpp
index bb66fc27..e941abcf 100644
--- a/test_conformance/images/clFillImage/test_fill_2D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D.cpp
@@ -83,6 +83,7 @@ int test_fill_image_set_2D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_2D_array.cpp b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
index 3265aab0..38196cfc 100644
--- a/test_conformance/images/clFillImage/test_fill_2D_array.cpp
+++ b/test_conformance/images/clFillImage/test_fill_2D_array.cpp
@@ -87,6 +87,7 @@ int test_fill_image_set_2D_array( cl_device_id device, cl_context context, cl_co
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clFillImage/test_fill_3D.cpp b/test_conformance/images/clFillImage/test_fill_3D.cpp
index 9db0ac7c..0b8e4e58 100644
--- a/test_conformance/images/clFillImage/test_fill_3D.cpp
+++ b/test_conformance/images/clFillImage/test_fill_3D.cpp
@@ -87,6 +87,7 @@ int test_fill_image_set_3D( cl_device_id device, cl_context context, cl_command_
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if ( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D.cpp b/test_conformance/images/clGetInfo/test_1D.cpp
index 0d704b82..7e044856 100644
--- a/test_conformance/images/clGetInfo/test_1D.cpp
+++ b/test_conformance/images/clGetInfo/test_1D.cpp
@@ -46,6 +46,7 @@ int test_get_image_info_1D( cl_device_id device, cl_context context, cl_image_fo
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
index 447fc7c2..c35bf22b 100644
--- a/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
+++ b/test_conformance/images/clGetInfo/test_1D_2D_array.cpp
@@ -44,6 +44,7 @@ int test_get_image_info_1D_array( cl_device_id device, cl_context context, cl_im
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
@@ -168,6 +169,7 @@ int test_get_image_info_2D_array( cl_device_id device, cl_context context, cl_im
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_2D.cpp b/test_conformance/images/clGetInfo/test_2D.cpp
index 74a60123..764b186d 100644
--- a/test_conformance/images/clGetInfo/test_2D.cpp
+++ b/test_conformance/images/clGetInfo/test_2D.cpp
@@ -285,6 +285,7 @@ int test_get_image_info_2D( cl_device_id device, cl_context context, cl_image_fo
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clGetInfo/test_3D.cpp b/test_conformance/images/clGetInfo/test_3D.cpp
index af5062e3..e1261863 100644
--- a/test_conformance/images/clGetInfo/test_3D.cpp
+++ b/test_conformance/images/clGetInfo/test_3D.cpp
@@ -47,6 +47,7 @@ int test_get_image_info_3D( cl_device_id device, cl_context context, cl_image_fo
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D.cpp b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
index 42933c0f..2d94dc82 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D.cpp
@@ -187,6 +187,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
index efd2a795..cc902042 100644
--- a/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_1D_array.cpp
@@ -191,6 +191,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D.cpp b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
index b7f8553b..b6102874 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D.cpp
@@ -194,6 +194,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
index 5889ad6a..401b0e4d 100644
--- a/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_2D_array.cpp
@@ -169,6 +169,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/clReadWriteImage/test_read_3D.cpp b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
index 6f73f423..ced04abf 100644
--- a/test_conformance/images/clReadWriteImage/test_read_3D.cpp
+++ b/test_conformance/images/clReadWriteImage/test_read_3D.cpp
@@ -174,6 +174,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
         memSize = (cl_ulong)SIZE_MAX;
+        maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D.cpp b/test_conformance/images/kernel_image_methods/test_1D.cpp
index 0059d4c2..934e78ba 100644
--- a/test_conformance/images/kernel_image_methods/test_1D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D.cpp
@@ -171,6 +171,7 @@ int test_get_image_info_1D(cl_device_id device, cl_context context,
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_1D_array.cpp b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
index 797161c4..a824f088 100644
--- a/test_conformance/images/kernel_image_methods/test_1D_array.cpp
+++ b/test_conformance/images/kernel_image_methods/test_1D_array.cpp
@@ -181,6 +181,7 @@ int test_get_image_info_1D_array(cl_device_id device, cl_context context,
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/kernel_image_methods/test_2D.cpp b/test_conformance/images/kernel_image_methods/test_2D.cpp
index b0d4a708..07f8d929 100644
--- a/test_conformance/images/kernel_image_methods/test_2D.cpp
+++ b/test_conformance/images/kernel_image_methods/test_2D.cpp
@@ -232,6 +232,7 @@ int test_get_image_info_2D(cl_device_id device, cl_context context,
 
   if (memSize > (cl_ulong)SIZE_MAX) {
     memSize = (cl_ulong)SIZE_MAX;
+    maxAllocSize = (cl_ulong)SIZE_MAX;
   }
 
     if( gTestSmallImages )
diff --git a/test_conformance/images/samplerlessReads/test_iterations.cpp b/test_conformance/images/samplerlessReads/test_iterations.cpp
index 55eaaf48..e2f89aad 100644
--- a/test_conformance/images/samplerlessReads/test_iterations.cpp
+++ b/test_conformance/images/samplerlessReads/test_iterations.cpp
@@ -215,6 +215,7 @@ int test_read_image_set_2D(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D.cpp b/test_conformance/images/samplerlessReads/test_read_1D.cpp
index aa261b7e..6ed9910a 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D.cpp
@@ -215,6 +215,7 @@ int test_read_image_set_1D(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
index fb0c2632..677eb9f1 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_array.cpp
@@ -214,6 +214,7 @@ int test_read_image_set_1D_array(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
index 7a3084d3..c3a991a7 100644
--- a/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_1D_buffer.cpp
@@ -219,6 +219,7 @@ int test_read_image_set_1D_buffer(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // note: image_buffer test uses image1D for results validation.
diff --git a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
index 99f24266..8273f538 100644
--- a/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_2D_array.cpp
@@ -202,6 +202,7 @@ int test_read_image_set_2D_array(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
diff --git a/test_conformance/images/samplerlessReads/test_read_3D.cpp b/test_conformance/images/samplerlessReads/test_read_3D.cpp
index cf411407..0df46c86 100644
--- a/test_conformance/images/samplerlessReads/test_read_3D.cpp
+++ b/test_conformance/images/samplerlessReads/test_read_3D.cpp
@@ -206,6 +206,7 @@ int test_read_image_set_3D(cl_device_id device, cl_context context,
 
     if (memSize > (cl_ulong)SIZE_MAX) {
       memSize = (cl_ulong)SIZE_MAX;
+      maxAllocSize = (cl_ulong)SIZE_MAX;
     }
 
     // Determine types
-- 
cgit v1.2.3


From d9a938b698985ec2377786299dd96db189d7ca41 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 4 Oct 2022 17:28:29 +0100
Subject: Factor out GetTernaryKernel (#1511)

Use a common function to create the kernel source code for testing
3-argument math builtins.  This reduces code duplication.  1-argument
and 2-argument math kernel construction will be factored out in future
work.

Change the kernels to use preprocessor defines for argument types and
undef values, to make the CTS code easier to read.

Co-authored-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Marco Antognini <marco.antognini@arm.com>
Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
Co-authored-by: Marco Antognini <marco.antognini@arm.com>
---
 test_conformance/math_brute_force/CMakeLists.txt   |   1 +
 test_conformance/math_brute_force/common.cpp       | 170 +++++++++++++++++++++
 test_conformance/math_brute_force/common.h         |  17 +++
 test_conformance/math_brute_force/mad_double.cpp   |  95 +-----------
 test_conformance/math_brute_force/mad_float.cpp    |  93 +----------
 .../math_brute_force/ternary_double.cpp            |  95 +-----------
 .../math_brute_force/ternary_float.cpp             |  93 +----------
 7 files changed, 216 insertions(+), 348 deletions(-)
 create mode 100644 test_conformance/math_brute_force/common.cpp

diff --git a/test_conformance/math_brute_force/CMakeLists.txt b/test_conformance/math_brute_force/CMakeLists.txt
index 1c96f521..32814026 100644
--- a/test_conformance/math_brute_force/CMakeLists.txt
+++ b/test_conformance/math_brute_force/CMakeLists.txt
@@ -9,6 +9,7 @@ set(${MODULE_NAME}_SOURCES
     binary_operator_float.cpp
     binary_two_results_i_double.cpp
     binary_two_results_i_float.cpp
+    common.cpp
     common.h
     function_list.cpp
     function_list.h
diff --git a/test_conformance/math_brute_force/common.cpp b/test_conformance/math_brute_force/common.cpp
new file mode 100644
index 00000000..f5e9f993
--- /dev/null
+++ b/test_conformance/math_brute_force/common.cpp
@@ -0,0 +1,170 @@
+//
+// Copyright (c) 2022 The Khronos Group Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include "common.h"
+
+#include "utility.h" // for sizeNames and sizeValues.
+
+#include <sstream>
+#include <string>
+
+namespace {
+
+const char *GetTypeName(ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Float: return "float";
+        case ParameterType::Double: return "double";
+    }
+    return nullptr;
+}
+
+const char *GetUndefValue(ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Float:
+        case ParameterType::Double: return "NAN";
+    }
+    return nullptr;
+}
+
+void EmitDefineType(std::ostringstream &kernel, const char *name,
+                    ParameterType type, int vector_size_index)
+{
+    kernel << "#define " << name << " " << GetTypeName(type)
+           << sizeNames[vector_size_index] << '\n';
+    kernel << "#define " << name << "_SCALAR " << GetTypeName(type) << '\n';
+}
+
+void EmitDefineUndef(std::ostringstream &kernel, const char *name,
+                     ParameterType type)
+{
+    kernel << "#define " << name << " " << GetUndefValue(type) << '\n';
+}
+
+void EmitEnableExtension(std::ostringstream &kernel, ParameterType type)
+{
+    switch (type)
+    {
+        case ParameterType::Double:
+            kernel << "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
+            break;
+
+        case ParameterType::Float:
+            // No extension required.
+            break;
+    }
+}
+
+} // anonymous namespace
+
+std::string GetKernelName(int vector_size_index)
+{
+    return std::string("math_kernel") + sizeNames[vector_size_index];
+}
+
+std::string GetTernaryKernel(const std::string &kernel_name,
+                             const char *builtin, ParameterType retType,
+                             ParameterType type1, ParameterType type2,
+                             ParameterType type3, int vector_size_index)
+{
+    // To keep the kernel code readable, use macros for types and undef values.
+    std::ostringstream kernel;
+    EmitDefineType(kernel, "RETTYPE", retType, vector_size_index);
+    EmitDefineType(kernel, "TYPE1", type1, vector_size_index);
+    EmitDefineType(kernel, "TYPE2", type2, vector_size_index);
+    EmitDefineType(kernel, "TYPE3", type3, vector_size_index);
+    EmitDefineUndef(kernel, "UNDEF1", type1);
+    EmitDefineUndef(kernel, "UNDEF2", type2);
+    EmitDefineUndef(kernel, "UNDEF3", type3);
+    EmitEnableExtension(kernel, type1);
+
+    // clang-format off
+    const char *kernel_nonvec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE* out,
+                          __global TYPE1* in1,
+                          __global TYPE2* in2,
+                          __global TYPE3* in3)
+{
+    size_t i = get_global_id(0);
+    out[i] = )", builtin, R"((in1[i], in2[i], in3[i]);
+}
+)" };
+
+    const char *kernel_vec3[] = { R"(
+__kernel void )", kernel_name.c_str(), R"((__global RETTYPE_SCALAR* out,
+                          __global TYPE1_SCALAR* in1,
+                          __global TYPE2_SCALAR* in2,
+                          __global TYPE3_SCALAR* in3)
+{
+    size_t i = get_global_id(0);
+
+    if (i + 1 < get_global_size(0))
+    {
+        TYPE1 a = vload3(0, in1 + 3 * i);
+        TYPE2 b = vload3(0, in2 + 3 * i);
+        TYPE3 c = vload3(0, in3 + 3 * i);
+        RETTYPE res = )", builtin, R"((a, b, c);
+        vstore3(res, 0, out + 3 * i);
+    }
+    else
+    {
+        // Figure out how many elements are left over after
+        // BUFFER_SIZE % (3 * sizeof(type)).
+        // Assume power of two buffer size.
+        size_t parity = i & 1;
+        TYPE1 a = (TYPE1)(UNDEF1, UNDEF1, UNDEF1);
+        TYPE2 b = (TYPE2)(UNDEF2, UNDEF2, UNDEF2);
+        TYPE3 c = (TYPE3)(UNDEF3, UNDEF3, UNDEF3);
+        switch (parity)
+        {
+            case 0:
+                a.y = in1[3 * i + 1];
+                b.y = in2[3 * i + 1];
+                c.y = in3[3 * i + 1];
+                // fall through
+            case 1:
+                a.x = in1[3 * i];
+                b.x = in2[3 * i];
+                c.x = in3[3 * i];
+                break;
+        }
+
+        RETTYPE res = )", builtin, R"((a, b, c);
+
+        switch (parity)
+        {
+            case 0:
+                out[3 * i + 1] = res.y;
+                // fall through
+            case 1:
+                out[3 * i] = res.x;
+                break;
+        }
+    }
+}
+)" };
+    // clang-format on
+
+    if (sizeValues[vector_size_index] != 3)
+        for (const auto &chunk : kernel_nonvec3) kernel << chunk;
+    else
+        for (const auto &chunk : kernel_vec3) kernel << chunk;
+
+    return kernel.str();
+}
diff --git a/test_conformance/math_brute_force/common.h b/test_conformance/math_brute_force/common.h
index 6f17898f..143814ca 100644
--- a/test_conformance/math_brute_force/common.h
+++ b/test_conformance/math_brute_force/common.h
@@ -20,6 +20,7 @@
 #include "utility.h"
 
 #include <array>
+#include <string>
 #include <vector>
 
 // Array of thread-specific kernels for each vector size.
@@ -31,6 +32,22 @@ using Programs = std::array<clProgramWrapper, VECTOR_SIZE_COUNT>;
 // Array of buffers for each vector size.
 using Buffers = std::array<clMemWrapper, VECTOR_SIZE_COUNT>;
 
+// Types supported for kernel code generation.
+enum class ParameterType
+{
+    Float,
+    Double,
+};
+
+// Return kernel name suffixed with vector size.
+std::string GetKernelName(int vector_size_index);
+
+// Generate kernel code for the given builtin function/operator.
+std::string GetTernaryKernel(const std::string &kernel_name,
+                             const char *builtin, ParameterType retType,
+                             ParameterType type1, ParameterType type2,
+                             ParameterType type3, int vector_size_index);
+
 // Information to generate OpenCL kernels.
 struct BuildKernelInfo
 {
diff --git a/test_conformance/math_brute_force/mad_double.cpp b/test_conformance/math_brute_force/mad_double.cpp
index 3def6a80..8d8fec52 100644
--- a/test_conformance/math_brute_force/mad_double.cpp
+++ b/test_conformance/math_brute_force/mad_double.cpp
@@ -26,94 +26,13 @@ namespace {
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+                                   ParameterType::Double, ParameterType::Double,
+                                   ParameterType::Double, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/mad_float.cpp b/test_conformance/math_brute_force/mad_float.cpp
index 498f25eb..04ac5aa6 100644
--- a/test_conformance/math_brute_force/mad_float.cpp
+++ b/test_conformance/math_brute_force/mad_float.cpp
@@ -26,92 +26,13 @@ namespace {
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+                                   ParameterType::Float, ParameterType::Float,
+                                   ParameterType::Float, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/ternary_double.cpp b/test_conformance/math_brute_force/ternary_double.cpp
index a7fa5625..b5f1ab09 100644
--- a/test_conformance/math_brute_force/ternary_double.cpp
+++ b/test_conformance/math_brute_force/ternary_double.cpp
@@ -30,94 +30,13 @@ namespace {
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-                        "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global double",
-                        sizeNames[vectorSize],
-                        "* out, __global double",
-                        sizeNames[vectorSize],
-                        "* in1, __global double",
-                        sizeNames[vectorSize],
-                        "* in2,  __global double",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global double* out, __global double* in, __global double* in2, "
-        "__global double* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       double3 d0 = vload3( 0, in + 3 * i );\n"
-        "       double3 d1 = vload3( 0, in2 + 3 * i );\n"
-        "       double3 d2 = vload3( 0, in3 + 3 * i );\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       vstore3( d0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       double3 d0;\n"
-        "       double3 d1;\n"
-        "       double3 d2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               d0 = (double3)( in[3*i], NAN, NAN ); \n"
-        "               d1 = (double3)( in2[3*i], NAN, NAN ); \n"
-        "               d2 = (double3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       d0 = ",
-        name,
-        "( d0, d1, d2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = d0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = d0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Double,
+                                   ParameterType::Double, ParameterType::Double,
+                                   ParameterType::Double, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
diff --git a/test_conformance/math_brute_force/ternary_float.cpp b/test_conformance/math_brute_force/ternary_float.cpp
index 3b8c2c3b..cf361841 100644
--- a/test_conformance/math_brute_force/ternary_float.cpp
+++ b/test_conformance/math_brute_force/ternary_float.cpp
@@ -30,92 +30,13 @@ namespace {
 int BuildKernel(const char *name, int vectorSize, cl_kernel *k, cl_program *p,
                 bool relaxedMode)
 {
-    const char *c[] = { "__kernel void math_kernel",
-                        sizeNames[vectorSize],
-                        "( __global float",
-                        sizeNames[vectorSize],
-                        "* out, __global float",
-                        sizeNames[vectorSize],
-                        "* in1, __global float",
-                        sizeNames[vectorSize],
-                        "* in2,  __global float",
-                        sizeNames[vectorSize],
-                        "* in3 )\n"
-                        "{\n"
-                        "   size_t i = get_global_id(0);\n"
-                        "   out[i] = ",
-                        name,
-                        "( in1[i], in2[i], in3[i] );\n"
-                        "}\n" };
-
-    const char *c3[] = {
-        "__kernel void math_kernel",
-        sizeNames[vectorSize],
-        "( __global float* out, __global float* in, __global float* in2, "
-        "__global float* in3)\n"
-        "{\n"
-        "   size_t i = get_global_id(0);\n"
-        "   if( i + 1 < get_global_size(0) )\n"
-        "   {\n"
-        "       float3 f0 = vload3( 0, in + 3 * i );\n"
-        "       float3 f1 = vload3( 0, in2 + 3 * i );\n"
-        "       float3 f2 = vload3( 0, in3 + 3 * i );\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       vstore3( f0, 0, out + 3*i );\n"
-        "   }\n"
-        "   else\n"
-        "   {\n"
-        "       size_t parity = i & 1;   // Figure out how many elements are "
-        "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
-        "buffer size \n"
-        "       float3 f0;\n"
-        "       float3 f1;\n"
-        "       float3 f2;\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 1:\n"
-        "               f0 = (float3)( in[3*i], NAN, NAN ); \n"
-        "               f1 = (float3)( in2[3*i], NAN, NAN ); \n"
-        "               f2 = (float3)( in3[3*i], NAN, NAN ); \n"
-        "               break;\n"
-        "           case 0:\n"
-        "               f0 = (float3)( in[3*i], in[3*i+1], NAN ); \n"
-        "               f1 = (float3)( in2[3*i], in2[3*i+1], NAN ); \n"
-        "               f2 = (float3)( in3[3*i], in3[3*i+1], NAN ); \n"
-        "               break;\n"
-        "       }\n"
-        "       f0 = ",
-        name,
-        "( f0, f1, f2 );\n"
-        "       switch( parity )\n"
-        "       {\n"
-        "           case 0:\n"
-        "               out[3*i+1] = f0.y; \n"
-        "               // fall through\n"
-        "           case 1:\n"
-        "               out[3*i] = f0.x; \n"
-        "               break;\n"
-        "       }\n"
-        "   }\n"
-        "}\n"
-    };
-
-    const char **kern = c;
-    size_t kernSize = sizeof(c) / sizeof(c[0]);
-
-    if (sizeValues[vectorSize] == 3)
-    {
-        kern = c3;
-        kernSize = sizeof(c3) / sizeof(c3[0]);
-    }
-
-    char testName[32];
-    snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
-             sizeNames[vectorSize]);
-
-    return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
+    auto kernel_name = GetKernelName(vectorSize);
+    auto source = GetTernaryKernel(kernel_name, name, ParameterType::Float,
+                                   ParameterType::Float, ParameterType::Float,
+                                   ParameterType::Float, vectorSize);
+    std::array<const char *, 1> sources{ source.c_str() };
+    return MakeKernel(sources.data(), sources.size(), kernel_name.c_str(), k, p,
+                      relaxedMode);
 }
 
 struct BuildKernelInfo2
-- 
cgit v1.2.3


From d285ebe5beb484702601d540f85a7b32f3b68643 Mon Sep 17 00:00:00 2001
From: niranjanjoshi121 <43807392+niranjanjoshi121@users.noreply.github.com>
Date: Tue, 11 Oct 2022 22:02:47 +0530
Subject: Fix memory oob problem in test conversions (#1513)

* Fix memory oob problem in test conversions

Allocate memory for argc arguments
instead of argc - 1.

* Fix formatting issue.
---
 test_conformance/conversions/test_conversions.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_conformance/conversions/test_conversions.cpp b/test_conformance/conversions/test_conversions.cpp
index 765d09ff..2b18b925 100644
--- a/test_conformance/conversions/test_conversions.cpp
+++ b/test_conformance/conversions/test_conversions.cpp
@@ -343,7 +343,7 @@ int main (int argc, const char **argv )
 static int ParseArgs( int argc, const char **argv )
 {
     int i;
-    argList = (const char **)calloc( argc - 1, sizeof( char*) );
+    argList = (const char **)calloc(argc, sizeof(char *));
     argCount = 0;
 
     if( NULL == argList && argc > 1 )
-- 
cgit v1.2.3


From f6e37b17d2dc5152bb96f35a108e5e2a458c4237 Mon Sep 17 00:00:00 2001
From: Grzegorz Wawiorko <grzegorz.wawiorko@intel.com>
Date: Tue, 11 Oct 2022 18:34:33 +0200
Subject: Fix image test image2d_from_buffer_positive (#1515)

---
 .../images/kernel_read_write/test_cl_ext_image_from_buffer.cpp      | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
index 1b3b04b7..2ce33a17 100644
--- a/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
+++ b/test_conformance/images/kernel_read_write/test_cl_ext_image_from_buffer.cpp
@@ -73,6 +73,12 @@ int image2d_from_buffer_positive(cl_device_id device, cl_context context,
         return TEST_SKIPPED_ITSELF;
     }
 
+    if (!is_extension_available(device, "cl_ext_image_requirements_info"))
+    {
+        printf("Extension cl_ext_image_requirements_info not available");
+        return TEST_SKIPPED_ITSELF;
+    }
+
     std::vector<cl_mem_object_type> imageTypes{
         CL_MEM_OBJECT_IMAGE1D,       CL_MEM_OBJECT_IMAGE2D,
         CL_MEM_OBJECT_IMAGE3D,       CL_MEM_OBJECT_IMAGE1D_BUFFER,
-- 
cgit v1.2.3


From 9e0ce2ba80b0af7e64b013918c8b46dad51107dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?K=C3=A9vin=20Petit?= <kpet@free.fr>
Date: Tue, 11 Oct 2022 17:35:36 +0100
Subject: Produce JSON results even when a suite's init function reports SKIP
 or FAIL (#1521)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also tidy-up some surrounding code.

Signed-off-by: Kévin Petit <kpet@free.fr>

Signed-off-by: Kévin Petit <kpet@free.fr>
---
 test_common/harness/testHarness.cpp | 164 ++++++++++++++++++++----------------
 1 file changed, 92 insertions(+), 72 deletions(-)

diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index b3863918..d07d982c 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -60,6 +60,54 @@ bool gCoreILProgram = true;
 
 #define DEFAULT_NUM_ELEMENTS 0x4000
 
+static int saveResultsToJson(const char *suiteName, test_definition testList[],
+                             unsigned char selectedTestList[],
+                             test_status resultTestList[], int testNum)
+{
+    char *fileName = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
+    if (fileName == nullptr)
+    {
+        return EXIT_SUCCESS;
+    }
+
+    FILE *file = fopen(fileName, "w");
+    if (NULL == file)
+    {
+        log_error("ERROR: Failed to open '%s' for writing results.\n",
+                  fileName);
+        return EXIT_FAILURE;
+    }
+
+    const char *save_map[] = { "success", "failure" };
+    const char *result_map[] = { "pass", "fail", "skip" };
+    const char *linebreak[] = { "", ",\n" };
+    int add_linebreak = 0;
+
+    fprintf(file, "{\n");
+    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
+    fprintf(file, "\t\"results\": {\n");
+
+    for (int i = 0; i < testNum; ++i)
+    {
+        if (selectedTestList[i])
+        {
+            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
+                    testList[i].name, result_map[(int)resultTestList[i]]);
+            add_linebreak = 1;
+        }
+    }
+    fprintf(file, "\n");
+
+    fprintf(file, "\t}\n");
+    fprintf(file, "}\n");
+
+    int ret = fclose(file) ? EXIT_FAILURE : EXIT_SUCCESS;
+
+    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
+
+    return ret;
+}
+
 int runTestHarness(int argc, const char *argv[], int testNum,
                    test_definition testList[], int forceNoContextCreation,
                    cl_command_queue_properties queueProps)
@@ -68,19 +116,28 @@ int runTestHarness(int argc, const char *argv[], int testNum,
                                    forceNoContextCreation, queueProps, NULL);
 }
 
-int skip_init_info(int count)
+int suite_did_not_pass_init(const char *suiteName, test_status status,
+                            int testNum, test_definition testList[])
 {
-    log_info("Test skipped while initialization\n");
-    log_info("SKIPPED %d of %d tests.\n", count, count);
-    return EXIT_SUCCESS;
-}
+    std::vector<unsigned char> selectedTestList(testNum, 1);
+    std::vector<test_status> resultTestList(testNum, status);
 
-int fail_init_info(int count)
-{
-    log_info("Test failed while initialization\n");
-    log_info("FAILED %d of %d tests.\n", count, count);
-    return EXIT_FAILURE;
+    int ret = saveResultsToJson(suiteName, testList, selectedTestList.data(),
+                                resultTestList.data(), testNum);
+
+    log_info("Test %s while initialization\n",
+             status == TEST_SKIP ? "skipped" : "failed");
+    log_info("%s %d of %d tests.\n", status == TEST_SKIP ? "SKIPPED" : "FAILED",
+             testNum, testNum);
+
+    if (ret != EXIT_SUCCESS)
+    {
+        return ret;
+    }
+
+    return status == TEST_SKIP ? EXIT_SUCCESS : EXIT_FAILURE;
 }
+
 void version_expected_info(const char *test_name, const char *api_name,
                            const char *expected_version,
                            const char *device_version)
@@ -470,6 +527,7 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
         log_error("Invalid device address bit size returned by device.\n");
         return EXIT_FAILURE;
     }
+    const char *suiteName = argv[0];
     if (gCompilationMode == kSpir_v)
     {
         test_status spirv_readiness = check_spirv_compilation_readiness(device);
@@ -478,9 +536,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
             switch (spirv_readiness)
             {
                 case TEST_PASS: break;
-                case TEST_FAIL: return fail_init_info(testNum);
-                case TEST_SKIP: return skip_init_info(testNum);
-                case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+                case TEST_FAIL:
+                    return suite_did_not_pass_init(suiteName, TEST_FAIL,
+                                                   testNum, testList);
+                case TEST_SKIP:
+                    return suite_did_not_pass_init(suiteName, TEST_SKIP,
+                                                   testNum, testList);
+                case TEST_SKIPPED_ITSELF:
+                    return suite_did_not_pass_init(suiteName, TEST_SKIP,
+                                                   testNum, testList);
             }
         }
     }
@@ -492,9 +556,15 @@ int runTestHarnessWithCheck(int argc, const char *argv[], int testNum,
         switch (status)
         {
             case TEST_PASS: break;
-            case TEST_FAIL: return fail_init_info(testNum);
-            case TEST_SKIP: return skip_init_info(testNum);
-            case TEST_SKIPPED_ITSELF: return skip_init_info(testNum);
+            case TEST_FAIL:
+                return suite_did_not_pass_init(suiteName, TEST_FAIL, testNum,
+                                               testList);
+            case TEST_SKIP:
+                return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+                                               testList);
+            case TEST_SKIPPED_ITSELF:
+                return suite_did_not_pass_init(suiteName, TEST_SKIP, testNum,
+                                               testList);
         }
     }
 
@@ -574,49 +644,6 @@ static int find_matching_tests(test_definition testList[],
     return EXIT_SUCCESS;
 }
 
-static int saveResultsToJson(const char *fileName, const char *suiteName,
-                             test_definition testList[],
-                             unsigned char selectedTestList[],
-                             test_status resultTestList[], int testNum)
-{
-    FILE *file = fopen(fileName, "w");
-    if (NULL == file)
-    {
-        log_error("ERROR: Failed to open '%s' for writing results.\n",
-                  fileName);
-        return EXIT_FAILURE;
-    }
-
-    const char *save_map[] = { "success", "failure" };
-    const char *result_map[] = { "pass", "fail", "skip" };
-    const char *linebreak[] = { "", ",\n" };
-    int add_linebreak = 0;
-
-    fprintf(file, "{\n");
-    fprintf(file, "\t\"cmd\": \"%s\",\n", suiteName);
-    fprintf(file, "\t\"results\": {\n");
-
-    for (int i = 0; i < testNum; ++i)
-    {
-        if (selectedTestList[i])
-        {
-            fprintf(file, "%s\t\t\"%s\": \"%s\"", linebreak[add_linebreak],
-                    testList[i].name, result_map[(int)resultTestList[i]]);
-            add_linebreak = 1;
-        }
-    }
-    fprintf(file, "\n");
-
-    fprintf(file, "\t}\n");
-    fprintf(file, "}\n");
-
-    int ret = fclose(file) ? 1 : 0;
-
-    log_info("Saving results to %s: %s!\n", fileName, save_map[ret]);
-
-    return ret;
-}
-
 static void print_results(int failed, int count, const char *name)
 {
     if (count < failed)
@@ -658,7 +685,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
     int ret = EXIT_SUCCESS;
 
     unsigned char *selectedTestList = (unsigned char *)calloc(testNum, 1);
-    test_status *resultTestList = NULL;
 
     if (argc == 1)
     {
@@ -697,24 +723,19 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
 
     if (ret == EXIT_SUCCESS)
     {
-        resultTestList =
-            (test_status *)calloc(testNum, sizeof(*resultTestList));
+        std::vector<test_status> resultTestList(testNum, TEST_PASS);
 
-        callTestFunctions(testList, selectedTestList, resultTestList, testNum,
-                          device, forceNoContextCreation, num_elements,
+        callTestFunctions(testList, selectedTestList, resultTestList.data(),
+                          testNum, device, forceNoContextCreation, num_elements,
                           queueProps);
 
         print_results(gFailCount, gTestCount, "sub-test");
         print_results(gTestsFailed, gTestsFailed + gTestsPassed, "test");
 
-        char *filename = getenv("CL_CONFORMANCE_RESULTS_FILENAME");
-        if (filename != NULL)
-        {
-            ret = saveResultsToJson(filename, argv[0], testList,
-                                    selectedTestList, resultTestList, testNum);
-        }
+        ret = saveResultsToJson(argv[0], testList, selectedTestList,
+                                resultTestList.data(), testNum);
 
-        if (std::any_of(resultTestList, resultTestList + testNum,
+        if (std::any_of(resultTestList.begin(), resultTestList.end(),
                         [](test_status result) {
                             switch (result)
                             {
@@ -730,7 +751,6 @@ int parseAndCallCommandLineTests(int argc, const char *argv[],
     }
 
     free(selectedTestList);
-    free(resultTestList);
 
     return ret;
 }
-- 
cgit v1.2.3


From 35cab9c85bb12c78d46a7def998857c3d1e27633 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 11 Oct 2022 17:36:33 +0100
Subject: pipes: Fix readwrite verification function for fp64 (#1522)

Use the appropriate function for verifying double precision values in
the `pipe_readwrite_double` test.

Change `verify_readwrite_double` to use `cl_long`, as `long long int`
could be wider than 64 bits which would cause out-of-bound reads.
This leaves no functional differences between
`verify_readwrite_double` and `verify_readwrite_long`.

Found by compiling with `-Wunused-function`, which flagged
`verify_readwrite_double` as unused.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/pipes/test_pipe_read_write.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test_conformance/pipes/test_pipe_read_write.cpp b/test_conformance/pipes/test_pipe_read_write.cpp
index cb72e96b..425c7aee 100644
--- a/test_conformance/pipes/test_pipe_read_write.cpp
+++ b/test_conformance/pipes/test_pipe_read_write.cpp
@@ -414,9 +414,9 @@ static int verify_readwrite_ulong(void *ptr1, void *ptr2, int n)
 static int verify_readwrite_double(void *ptr1, void *ptr2, int n)
 {
     int                i;
-    long long int    sum_input = 0, sum_output = 0;
-    long long int    *inptr = (long long int *)ptr1;
-    long long int    *outptr = (long long int *)ptr2;
+    cl_long sum_input = 0, sum_output = 0;
+    cl_long *inptr = (cl_long *)ptr1;
+    cl_long *outptr = (cl_long *)ptr2;
 
     for(i = 0; i < n; i++)
     {
@@ -1246,7 +1246,7 @@ int test_pipe_readwrite_double( cl_device_id deviceID, cl_context context, cl_co
 
     size_t  min_alignment = get_min_alignment(context);
 
-    foo = verify_readwrite_long;
+    foo = verify_readwrite_double;
 
     ptrSizes[0] = sizeof(cl_double);
     ptrSizes[1] = ptrSizes[0] << 1;
-- 
cgit v1.2.3


From 4b39b59469444d9085db302ab0d2dd5b07a9f257 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 13 Oct 2022 10:01:53 +0100
Subject: [NFC] clang-format basic/test_progvar.cpp (#1528)

Manually reformat the `prog_src` variable which contains kernel code
and disable clang-format on it.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_conformance/basic/test_progvar.cpp | 1737 +++++++++++++++++++------------
 1 file changed, 1098 insertions(+), 639 deletions(-)

diff --git a/test_conformance/basic/test_progvar.cpp b/test_conformance/basic/test_progvar.cpp
index 9c872be5..e202d276 100644
--- a/test_conformance/basic/test_progvar.cpp
+++ b/test_conformance/basic/test_progvar.cpp
@@ -15,12 +15,13 @@
 //
 #include "harness/compat.h"
 
-// Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits.
+// Bug: Missing in spec: atomic_intptr_t is always supported if device is
+// 32-bits.
 // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE
 
 #define FLUSH fflush(stdout)
 
-#define MAX_STR 16*1024
+#define MAX_STR 16 * 1024
 
 #define ALIGNMENT 128
 
@@ -66,7 +67,11 @@ static int l_host_is_big_endian = 1;
 static size_t l_max_global_id0 = 0;
 static cl_bool l_linker_available = false;
 
-#define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0)
+#define check_error(errCode, msg, ...)                                         \
+    ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n",         \
+                                          ##__VA_ARGS__, __FILE__, __LINE__),  \
+                                1)                                             \
+                             : 0)
 
 ////////////////////
 // Info about types we can use for program scope variables.
@@ -75,110 +80,135 @@ static cl_bool l_linker_available = false;
 class TypeInfo {
 
 public:
-    TypeInfo() :
-        name(""),
-        m_buf_elem_type(""),
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(0), m_num_elem(0),
-        m_size(0),
-        m_value_size(0)
-        {}
-    TypeInfo(const char* name_arg) :
-        name(name_arg),
-        m_buf_elem_type(name_arg),
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(0), m_num_elem(0),
-        m_size(0),
-        m_value_size(0)
-        { }
+    TypeInfo()
+        : name(""), m_buf_elem_type(""), m_is_vecbase(false),
+          m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+    {}
+    TypeInfo(const char* name_arg)
+        : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false),
+          m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false),
+          m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0)
+    {}
 
     // Vectors
-    TypeInfo( TypeInfo* elem_type, int num_elem ) :
-        m_is_vecbase(false),
-        m_is_atomic(false),
-        m_is_like_size_t(false),
-        m_is_bool(false),
-        m_elem_type(elem_type),
-        m_num_elem(num_elem)
-    {
-        char the_name[10]; // long enough for longest vector type name "double16"
-        snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem);
+    TypeInfo(TypeInfo* elem_type, int num_elem)
+        : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false),
+          m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem)
+    {
+        char
+            the_name[10]; // long enough for longest vector type name "double16"
+        snprintf(the_name, sizeof(the_name), "%s%d",
+                 elem_type->get_name_c_str(), m_num_elem);
         this->name = std::string(the_name);
         this->m_buf_elem_type = std::string(the_name);
         this->m_value_size = num_elem * elem_type->get_size();
-        if ( m_num_elem == 3 ) {
+        if (m_num_elem == 3)
+        {
             this->m_size = 4 * elem_type->get_size();
-        } else {
+        }
+        else
+        {
             this->m_size = num_elem * elem_type->get_size();
         }
     }
     const std::string& get_name(void) const { return name; }
     const char* get_name_c_str(void) const { return name.c_str(); }
-    TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; }
-    TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; }
-    TypeInfo& set_like_size_t(void) {
+    TypeInfo& set_vecbase(void)
+    {
+        this->m_is_vecbase = true;
+        return *this;
+    }
+    TypeInfo& set_atomic(void)
+    {
+        this->m_is_atomic = true;
+        return *this;
+    }
+    TypeInfo& set_like_size_t(void)
+    {
         this->m_is_like_size_t = true;
-        this->set_size( l_64bit_device ? 8 : 4 );
+        this->set_size(l_64bit_device ? 8 : 4);
         this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint";
         return *this;
     }
-    TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; }
-    TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; }
-    TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; }
+    TypeInfo& set_bool(void)
+    {
+        this->m_is_bool = true;
+        return *this;
+    }
+    TypeInfo& set_size(size_t n)
+    {
+        this->m_value_size = this->m_size = n;
+        return *this;
+    }
+    TypeInfo& set_buf_elem_type(const char* name)
+    {
+        this->m_buf_elem_type = std::string(name);
+        return *this;
+    }
 
     const TypeInfo* elem_type(void) const { return m_elem_type; }
     int num_elem(void) const { return m_num_elem; }
 
-    bool is_vecbase(void) const {return m_is_vecbase;}
-    bool is_atomic(void) const {return m_is_atomic;}
-    bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;}
-    bool is_like_size_t(void) const {return m_is_like_size_t;}
-    bool is_bool(void) const {return m_is_bool;}
-    size_t get_size(void) const {return m_size;}
-    size_t get_value_size(void) const {return m_value_size;}
+    bool is_vecbase(void) const { return m_is_vecbase; }
+    bool is_atomic(void) const { return m_is_atomic; }
+    bool is_atomic_64bit(void) const { return m_is_atomic && m_size == 8; }
+    bool is_like_size_t(void) const { return m_is_like_size_t; }
+    bool is_bool(void) const { return m_is_bool; }
+    size_t get_size(void) const { return m_size; }
+    size_t get_value_size(void) const { return m_value_size; }
 
     // When passing values of this type to a kernel, what buffer type
     // should be used?
-    const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); }
+    const char* get_buf_elem_type(void) const
+    {
+        return m_buf_elem_type.c_str();
+    }
 
-    std::string as_string(const cl_uchar* value_ptr) const {
+    std::string as_string(const cl_uchar* value_ptr) const
+    {
         // This method would be shorter if I had a real handle to element
         // vector type.
-        if ( this->is_bool() ) {
-            std::string result( name );
+        if (this->is_bool())
+        {
+            std::string result(name);
             result += "<";
             result += (*value_ptr ? "true" : "false");
             result += ", ";
             char buf[10];
-            sprintf(buf,"%02x",*value_ptr);
+            sprintf(buf, "%02x", *value_ptr);
             result += buf;
             result += ">";
             return result;
-        } else if ( this->num_elem() ) {
-            std::string result( name );
+        }
+        else if (this->num_elem())
+        {
+            std::string result(name);
             result += "<";
-            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+            for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+            {
                 char buf[MAX_STR];
-                if ( ielem ) result += ", ";
-                for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) {
-                    sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] );
+                if (ielem) result += ", ";
+                for (unsigned ibyte = 0; ibyte < this->m_elem_type->get_size();
+                     ibyte++)
+                {
+                    sprintf(buf + 2 * ibyte, "%02x",
+                            value_ptr[ielem * this->m_elem_type->get_size()
+                                      + ibyte]);
                 }
                 result += buf;
             }
             result += ">";
             return result;
-        } else {
-            std::string result( name );
+        }
+        else
+        {
+            std::string result(name);
             result += "<";
             char buf[MAX_STR];
-            for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) {
-                sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] );
+            for (unsigned ibyte = 0; ibyte < this->get_size(); ibyte++)
+            {
+                sprintf(buf + 2 * ibyte, "%02x", value_ptr[ibyte]);
             }
             result += buf;
             result += ">";
@@ -189,51 +219,71 @@ public:
     // Initialize the given buffer to a constant value initialized as if it
     // were from the INIT_VAR macro below.
     // Only needs to support values 0 and 1.
-    void init( cl_uchar* buf, cl_uchar val) const {
-        if ( this->num_elem() ) {
-            for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) {
+    void init(cl_uchar* buf, cl_uchar val) const
+    {
+        if (this->num_elem())
+        {
+            for (unsigned ielem = 0; ielem < this->num_elem(); ielem++)
+            {
                 // Delegate!
-                this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val );
+                this->init_elem(
+                    buf + ielem * this->get_value_size() / this->num_elem(),
+                    val);
             }
-        } else {
-            init_elem( buf, val );
+        }
+        else
+        {
+            init_elem(buf, val);
         }
     }
 
 private:
-    void init_elem( cl_uchar* buf, cl_uchar val ) const {
-        size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size();
-        memset(buf,0,elem_size);
-        if ( val ) {
-            if ( strstr( name.c_str(), "float" ) ) {
+    void init_elem(cl_uchar* buf, cl_uchar val) const
+    {
+        size_t elem_size = this->num_elem()
+            ? this->get_value_size() / this->num_elem()
+            : this->get_size();
+        memset(buf, 0, elem_size);
+        if (val)
+        {
+            if (strstr(name.c_str(), "float"))
+            {
                 *(float*)buf = (float)val;
                 return;
             }
-            if ( strstr( name.c_str(), "double" ) ) {
+            if (strstr(name.c_str(), "double"))
+            {
                 *(double*)buf = (double)val;
                 return;
             }
-            if ( this->is_bool() ) { *buf = (bool)val; return; }
+            if (this->is_bool())
+            {
+                *buf = (bool)val;
+                return;
+            }
 
             // Write a single character value to the correct spot,
             // depending on host endianness.
-            if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val;
-            else *buf = (cl_uchar)val;
+            if (l_host_is_big_endian)
+                *(buf + elem_size - 1) = (cl_uchar)val;
+            else
+                *buf = (cl_uchar)val;
         }
     }
-public:
 
-    void dump(FILE* fp) const {
-        fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(),
-                (int)m_size,
-                (int)m_value_size,
-                m_buf_elem_type.c_str() );
-        if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() );
-        if ( this->m_is_vecbase ) fprintf(fp, " vecbase");
-        if ( this->m_is_bool ) fprintf(fp, " bool");
-        if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t");
-        if ( this->m_is_atomic ) fprintf(fp, " atomic");
-        fprintf(fp,"\n");
+public:
+    void dump(FILE* fp) const
+    {
+        fprintf(fp, "Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size,
+                (int)m_value_size, m_buf_elem_type.c_str());
+        if (this->m_elem_type)
+            fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(),
+                    this->num_elem());
+        if (this->m_is_vecbase) fprintf(fp, " vecbase");
+        if (this->m_is_bool) fprintf(fp, " bool");
+        if (this->m_is_like_size_t) fprintf(fp, " like-size_t");
+        if (this->m_is_atomic) fprintf(fp, " atomic");
+        fprintf(fp, "\n");
         fflush(fp);
     }
 
@@ -246,7 +296,8 @@ private:
     bool m_is_like_size_t;
     bool m_is_bool;
     size_t m_size; // Number of bytes of storage occupied by this type.
-    size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3.
+    size_t m_value_size; // Number of bytes of value significant for this type.
+                         // Differs for vec3.
 
     // When passing values of this type to a kernel, what buffer type
     // should be used?
@@ -256,46 +307,65 @@ private:
 };
 
 
-#define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double
-#define NUM_VECTOR_SIZES (5)   // 2,3,4,8,16
-#define NUM_PLAIN_TYPES \
-      5 /*boolean and size_t family */  \
-    + NUM_SCALAR_TYPES \
-    + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \
-    + 10 /* atomic types */
+#define NUM_SCALAR_TYPES                                                       \
+    (8 + 2) // signed and unsigned integral types, float and double
+#define NUM_VECTOR_SIZES (5) // 2,3,4,8,16
+#define NUM_PLAIN_TYPES                                                        \
+    5 /*boolean and size_t family */                                           \
+        + NUM_SCALAR_TYPES + NUM_SCALAR_TYPES* NUM_VECTOR_SIZES                \
+        + 10 /* atomic types */
 
 // Need room for plain, array, pointer, struct
-#define MAX_TYPES (4*NUM_PLAIN_TYPES)
+#define MAX_TYPES (4 * NUM_PLAIN_TYPES)
 
 static TypeInfo type_info[MAX_TYPES];
 static int num_type_info = 0; // Number of valid entries in type_info[]
 
 
-
-
 // A helper class to form kernel source arguments for clCreateProgramWithSource.
 class StringTable {
 public:
-    StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {}
+    StringTable(): m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings()
+    {}
     ~StringTable() { release_frozen(); }
 
-    void add(std::string s) { release_frozen(); m_strings.push_back(s); }
+    void add(std::string s)
+    {
+        release_frozen();
+        m_strings.push_back(s);
+    }
 
-    const size_t num_str() { freeze(); return m_strings.size(); }
-    const char** strs() { freeze(); return m_c_strs; }
-    const size_t* lengths() { freeze(); return m_lengths; }
+    const size_t num_str()
+    {
+        freeze();
+        return m_strings.size();
+    }
+    const char** strs()
+    {
+        freeze();
+        return m_c_strs;
+    }
+    const size_t* lengths()
+    {
+        freeze();
+        return m_lengths;
+    }
 
 private:
-    void freeze(void) {
-        if ( !m_frozen ) {
+    void freeze(void)
+    {
+        if (!m_frozen)
+        {
             release_frozen();
 
-            m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size());
-            m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size());
-            assert( m_c_strs );
-            assert( m_lengths );
+            m_c_strs =
+                (const char**)malloc(sizeof(const char*) * m_strings.size());
+            m_lengths = (size_t*)malloc(sizeof(size_t) * m_strings.size());
+            assert(m_c_strs);
+            assert(m_lengths);
 
-            for ( size_t i = 0; i < m_strings.size() ; i++ ) {
+            for (size_t i = 0; i < m_strings.size(); i++)
+            {
                 m_c_strs[i] = m_strings[i].c_str();
                 m_lengths[i] = strlen(m_c_strs[i]);
             }
@@ -303,9 +373,18 @@ private:
             m_frozen = true;
         }
     }
-    void release_frozen(void) {
-        if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; }
-        if ( m_lengths ) { free(m_lengths); m_lengths = 0; }
+    void release_frozen(void)
+    {
+        if (m_c_strs)
+        {
+            free(m_c_strs);
+            m_c_strs = 0;
+        }
+        if (m_lengths)
+        {
+            free(m_lengths);
+            m_lengths = 0;
+        }
         m_frozen = false;
     }
 
@@ -325,11 +404,15 @@ static const char* l_get_fp64_pragma(void);
 static const char* l_get_cles_int64_pragma(void);
 static int l_build_type_table(cl_device_id device);
 
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret);
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+                             size_t* pref_size_ret);
 
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state );
-static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti );
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti );
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+                           RandomSeed& rand_state);
+static int l_compare(const cl_uchar* expected, const cl_uchar* received,
+                     unsigned num_values, const TypeInfo& ti);
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+                  unsigned src_idx, const TypeInfo& ti);
 
 static std::string conversion_functions(const TypeInfo& ti);
 static std::string global_decls(const TypeInfo& ti, bool with_init);
@@ -337,90 +420,123 @@ static std::string global_check_function(const TypeInfo& ti);
 static std::string writer_function(const TypeInfo& ti);
 static std::string reader_function(const TypeInfo& ti);
 
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
-
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue );
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state );
+static int l_write_read(cl_device_id device, cl_context context,
+                        cl_command_queue queue);
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, const TypeInfo& ti,
+                                 RandomSeed& rand_state);
 
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size );
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation );
+static int l_init_write_read(cl_device_id device, cl_context context,
+                             cl_command_queue queue);
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      const TypeInfo& ti,
+                                      RandomSeed& rand_state);
 
+static int l_capacity(cl_device_id device, cl_context context,
+                      cl_command_queue queue, size_t max_size);
+static int l_user_type(cl_device_id device, cl_context context,
+                       cl_command_queue queue, size_t max_size,
+                       bool separate_compilation);
 
 
 ////////////////////
 // File scope function definitions
 
-static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options)
+static cl_int print_build_log(cl_program program, cl_uint num_devices,
+                              cl_device_id* device_list, cl_uint count,
+                              const char** strings, const size_t* lengths,
+                              const char* options)
 {
     cl_uint i;
     cl_int error;
     BufferOwningPtr<cl_device_id> devices;
 
-    if(num_devices == 0 || device_list == NULL)
+    if (num_devices == 0 || device_list == NULL)
     {
-        error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL);
+        error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES,
+                                 sizeof(num_devices), &num_devices, NULL);
         test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
 
-        device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices);
+        device_list = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devices);
         devices.reset(device_list);
 
         memset(device_list, 0, sizeof(cl_device_id) * num_devices);
 
-        error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL);
+        error = clGetProgramInfo(program, CL_PROGRAM_DEVICES,
+                                 sizeof(cl_device_id) * num_devices,
+                                 device_list, NULL);
         test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
     }
 
     cl_uint z;
     bool sourcePrinted = false;
 
-    for(z = 0; z < num_devices; z++)
+    for (z = 0; z < num_devices; z++)
     {
         char deviceName[4096] = "";
-        error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL);
-        check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z);
+        error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME,
+                                sizeof(deviceName), deviceName, NULL);
+        check_error(error,
+                    "Device \"%d\" failed to return a name. clGetDeviceInfo "
+                    "CL_DEVICE_NAME failed",
+                    z);
 
         cl_build_status buildStatus;
-        error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
-        check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
+        error = clGetProgramBuildInfo(program, device_list[z],
+                                      CL_PROGRAM_BUILD_STATUS,
+                                      sizeof(buildStatus), &buildStatus, NULL);
+        check_error(error,
+                    "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
 
-        if(buildStatus != CL_BUILD_SUCCESS)
+        if (buildStatus != CL_BUILD_SUCCESS)
         {
-            if(!sourcePrinted)
+            if (!sourcePrinted)
             {
                 log_error("Build options: %s\n", options);
-                if(count && strings)
+                if (count && strings)
                 {
                     log_error("Original source is: ------------\n");
-                    for(i = 0; i < count; i++) log_error("%s", strings[i]);
+                    for (i = 0; i < count; i++) log_error("%s", strings[i]);
                 }
                 sourcePrinted = true;
             }
 
             char statusString[64] = "";
             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
-              sprintf(statusString, "CL_BUILD_SUCCESS");
+                sprintf(statusString, "CL_BUILD_SUCCESS");
             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
-              sprintf(statusString, "CL_BUILD_NONE");
+                sprintf(statusString, "CL_BUILD_NONE");
             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
-              sprintf(statusString, "CL_BUILD_ERROR");
+                sprintf(statusString, "CL_BUILD_ERROR");
             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
-              sprintf(statusString, "CL_BUILD_IN_PROGRESS");
+                sprintf(statusString, "CL_BUILD_IN_PROGRESS");
             else
-              sprintf(statusString, "UNKNOWN (%d)", buildStatus);
+                sprintf(statusString, "UNKNOWN (%d)", buildStatus);
 
-            log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
+            log_error("Build not successful for device \"%s\", status: %s\n",
+                      deviceName, statusString);
 
             size_t paramSize = 0;
-            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, &paramSize);
-            if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break;
+            error = clGetProgramBuildInfo(program, device_list[z],
+                                          CL_PROGRAM_BUILD_LOG, 0, NULL,
+                                          &paramSize);
+            if (check_error(
+                    error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"))
+                break;
 
             std::string log;
-            log.resize(paramSize/sizeof(char));
-
-            error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL);
-            if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break;
-            if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n");
+            log.resize(paramSize / sizeof(char));
+
+            error = clGetProgramBuildInfo(program, device_list[z],
+                                          CL_PROGRAM_BUILD_LOG, paramSize,
+                                          &log[0], NULL);
+            if (check_error(error,
+                            "Device %d (%s) failed to return a build log", z,
+                            deviceName))
+                break;
+            if (log[0] == 0)
+                log_error("clGetProgramBuildInfo returned an empty log.\n");
             else
             {
                 log_error("Build log:\n", deviceName);
@@ -433,25 +549,29 @@ static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device
 
 static void l_load_abilities(cl_device_id device)
 {
-    l_has_half       = is_extension_available(device,"cl_khr_fp16");
-    l_has_double     = is_extension_available(device,"cl_khr_fp64");
-    l_has_cles_int64 = is_extension_available(device,"cles_khr_int64");
+    l_has_half = is_extension_available(device, "cl_khr_fp16");
+    l_has_double = is_extension_available(device, "cl_khr_fp64");
+    l_has_cles_int64 = is_extension_available(device, "cles_khr_int64");
 
-    l_has_int64_atomics
-    =  is_extension_available(device,"cl_khr_int64_base_atomics")
-    && is_extension_available(device,"cl_khr_int64_extended_atomics");
+    l_has_int64_atomics =
+        is_extension_available(device, "cl_khr_int64_base_atomics")
+        && is_extension_available(device, "cl_khr_int64_extended_atomics");
 
     {
         int status = CL_SUCCESS;
         cl_uint addr_bits = 32;
-        status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0);
-        l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 );
+        status = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS,
+                                 sizeof(addr_bits), &addr_bits, 0);
+        l_64bit_device = (status == CL_SUCCESS && addr_bits == 64);
     }
 
     // 32-bit devices always have intptr atomics.
     l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics;
 
-    union { char c[4]; int i; } probe;
+    union {
+        char c[4];
+        int i;
+    } probe;
     probe.i = 1;
     l_host_is_big_endian = !probe.c[0];
 
@@ -459,33 +579,40 @@ static void l_load_abilities(cl_device_id device)
     {
         int status = CL_SUCCESS;
         cl_uint max_dim = 0;
-        status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0);
-        assert( status == CL_SUCCESS );
-        assert( max_dim > 0 );
+        status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
+                                 sizeof(max_dim), &max_dim, 0);
+        assert(status == CL_SUCCESS);
+        assert(max_dim > 0);
         size_t max_id[3];
         max_id[0] = 0;
-    status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0);
-        assert( status == CL_SUCCESS );
+        status = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
+                                 max_dim * sizeof(size_t), &max_id[0], 0);
+        assert(status == CL_SUCCESS);
         l_max_global_id0 = max_id[0];
     }
 
     { // Is separate compilation supported?
         int status = CL_SUCCESS;
         l_linker_available = false;
-        status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0);
-        assert( status == CL_SUCCESS );
+        status =
+            clGetDeviceInfo(device, CL_DEVICE_LINKER_AVAILABLE,
+                            sizeof(l_linker_available), &l_linker_available, 0);
+        assert(status == CL_SUCCESS);
     }
 }
 
 
 static const char* l_get_fp64_pragma(void)
 {
-    return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : "";
+    return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"
+                        : "";
 }
 
 static const char* l_get_cles_int64_pragma(void)
 {
-    return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : "";
+    return l_has_cles_int64
+        ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n"
+        : "";
 }
 
 static const char* l_get_int64_atomic_pragma(void)
@@ -500,89 +627,83 @@ static int l_build_type_table(cl_device_id device)
     size_t iscalar = 0;
     size_t ivecsize = 0;
     int vecsizes[] = { 2, 3, 4, 8, 16 };
-    const char* vecbase[] = {
-        "uchar", "char",
-        "ushort", "short",
-        "uint", "int",
-        "ulong", "long",
-        "float",
-        "double"
-    };
-    int vecbase_size[] = {
-        1, 1,
-        2, 2,
-        4, 4,
-        8, 8,
-        4,
-        8
-    };
-    const char* like_size_t[] = {
-        "intptr_t",
-        "uintptr_t",
-        "size_t",
-        "ptrdiff_t"
-    };
+    const char* vecbase[] = { "uchar", "char",  "ushort", "short", "uint",
+                              "int",   "ulong", "long",   "float", "double" };
+    int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
+    const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t",
+                                  "ptrdiff_t" };
     const char* atomics[] = {
-        "atomic_int", "atomic_uint",
-        "atomic_long", "atomic_ulong",
-        "atomic_float",
-        "atomic_double",
-    };
-    int atomics_size[] = {
-        4, 4,
-        8, 8,
-        4,
-        8
-    };
-    const char* intptr_atomics[] = {
-        "atomic_intptr_t",
-        "atomic_uintptr_t",
-        "atomic_size_t",
-        "atomic_ptrdiff_t"
+        "atomic_int",   "atomic_uint",  "atomic_long",
+        "atomic_ulong", "atomic_float", "atomic_double",
     };
+    int atomics_size[] = { 4, 4, 8, 8, 4, 8 };
+    const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t",
+                                     "atomic_size_t", "atomic_ptrdiff_t" };
 
     l_load_abilities(device);
     num_type_info = 0;
 
     // Boolean.
-    type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar");
+    type_info[num_type_info++] =
+        TypeInfo("bool").set_bool().set_size(1).set_buf_elem_type("uchar");
 
     // Vector types, and the related scalar element types.
-    for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) {
-        if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue;
-        if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue;
+    for (iscalar = 0; iscalar < sizeof(vecbase) / sizeof(vecbase[0]); ++iscalar)
+    {
+        if (!gHasLong && strstr(vecbase[iscalar], "long")) continue;
+        if (!l_has_double && strstr(vecbase[iscalar], "double")) continue;
 
         // Scalar
         TypeInfo* elem_type = type_info + num_type_info++;
-        *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] );
+        *elem_type = TypeInfo(vecbase[iscalar])
+                         .set_vecbase()
+                         .set_size(vecbase_size[iscalar]);
 
         // Vector
-        for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) {
-            type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] );
+        for (ivecsize = 0; ivecsize < sizeof(vecsizes) / sizeof(vecsizes[0]);
+             ivecsize++)
+        {
+            type_info[num_type_info++] =
+                TypeInfo(elem_type, vecsizes[ivecsize]);
         }
     }
 
     // Size_t-like types
-    for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) {
-        type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t();
+    for (iscalar = 0; iscalar < sizeof(like_size_t) / sizeof(like_size_t[0]);
+         ++iscalar)
+    {
+        type_info[num_type_info++] =
+            TypeInfo(like_size_t[iscalar]).set_like_size_t();
     }
 
     // Atomic types.
-    for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) {
-        if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue;
-        if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue;
+    for (iscalar = 0; iscalar < sizeof(atomics) / sizeof(atomics[0]); ++iscalar)
+    {
+        if (!l_has_int64_atomics && strstr(atomics[iscalar], "long")) continue;
+        if (!(l_has_int64_atomics && l_has_double)
+            && strstr(atomics[iscalar], "double"))
+            continue;
 
         // The +7 is used to skip over the "atomic_" prefix.
         const char* buf_type = atomics[iscalar] + 7;
-        type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type );
+        type_info[num_type_info++] = TypeInfo(atomics[iscalar])
+                                         .set_atomic()
+                                         .set_size(atomics_size[iscalar])
+                                         .set_buf_elem_type(buf_type);
     }
-    if ( l_has_intptr_atomics ) {
-        for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) {
-            type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t();
+    if (l_has_intptr_atomics)
+    {
+        for (iscalar = 0;
+             iscalar < sizeof(intptr_atomics) / sizeof(intptr_atomics[0]);
+             ++iscalar)
+        {
+            type_info[num_type_info++] = TypeInfo(intptr_atomics[iscalar])
+                                             .set_atomic()
+                                             .set_like_size_t();
         }
     }
 
-    assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES
+    assert(num_type_info <= MAX_TYPES); // or increase MAX_TYPES
 
 #if 0
     for ( size_t i = 0 ; i < num_type_info ; i++ ) {
@@ -594,7 +715,7 @@ static int l_build_type_table(cl_device_id device)
     return status;
 }
 
-static const TypeInfo& l_find_type( const char* name )
+static const TypeInfo& l_find_type(const char* name)
 {
     auto itr =
         std::find_if(type_info, type_info + num_type_info,
@@ -604,36 +725,54 @@ static const TypeInfo& l_find_type( const char* name )
 }
 
 
+// Populate return parameters for max program variable size, preferred program
+// variable size.
 
-// Populate return parameters for max program variable size, preferred program variable size.
-
-static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret)
+static int l_get_device_info(cl_device_id device, size_t* max_size_ret,
+                             size_t* pref_size_ret)
 {
     int err = CL_SUCCESS;
     size_t return_size = 0;
 
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size);
-    if ( err != CL_SUCCESS ) {
-        log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE,
+                          sizeof(*max_size_ret), max_size_ret, &return_size);
+    if (err != CL_SUCCESS)
+    {
+        log_error("Error: Failed to get device info for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n");
         return err;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+                  (int)return_size);
         return 1;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n",
+                  (int)return_size);
         return 1;
     }
 
     return_size = 0;
-    err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size);
-    if ( err != CL_SUCCESS ) {
-        log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err);
+    err =
+        clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
+                        sizeof(*pref_size_ret), pref_size_ret, &return_size);
+    if (err != CL_SUCCESS)
+    {
+        log_error("Error: Failed to get device info for "
+                  "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",
+                  err);
         return err;
     }
-    if ( return_size != sizeof(size_t) ) {
-        log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size );
+    if (return_size != sizeof(size_t))
+    {
+        log_error("Error: Invalid size %d returned for "
+                  "CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n",
+                  (int)return_size);
         return 1;
     }
 
@@ -641,11 +780,13 @@ static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t*
 }
 
 
-static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state )
+static void l_set_randomly(cl_uchar* buf, size_t buf_size,
+                           RandomSeed& rand_state)
 {
-    assert( 0 == (buf_size % sizeof(cl_uint) ) );
-    for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) {
-        *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state );
+    assert(0 == (buf_size % sizeof(cl_uint)));
+    for (size_t i = 0; i < buf_size; i += sizeof(cl_uint))
+    {
+        *((cl_uint*)(buf + i)) = genrand_int32(rand_state);
     }
 #if 0
     for ( size_t i = 0; i < buf_size ; i++ ) {
@@ -657,20 +798,23 @@ static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_sta
 
 // Return num_value values of the given type.
 // Returns CL_SUCCESS if they compared as equal.
-static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti )
+static int l_compare(const char* test_name, const cl_uchar* expected,
+                     const cl_uchar* received, size_t num_values,
+                     const TypeInfo& ti)
 {
     // Compare only the valid returned bytes.
-    for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) {
+    for (unsigned value_idx = 0; value_idx < num_values; value_idx++)
+    {
         const cl_uchar* expv = expected + value_idx * ti.get_size();
         const cl_uchar* gotv = received + value_idx * ti.get_size();
-        if ( memcmp( expv, gotv, ti.get_value_size() ) ) {
-            std::string exp_str = ti.as_string( expv );
-            std::string got_str = ti.as_string( gotv );
-            log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n",
-                    test_name,
-                    ti.get_name_c_str(), value_idx,
-                    exp_str.c_str(),
-                    got_str.c_str() );
+        if (memcmp(expv, gotv, ti.get_value_size()))
+        {
+            std::string exp_str = ti.as_string(expv);
+            std::string got_str = ti.as_string(gotv);
+            log_error(
+                "Error: %s test for type %s, at index %d: Expected %s got %s\n",
+                test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(),
+                got_str.c_str());
             return 1;
         }
     }
@@ -678,11 +822,12 @@ static int l_compare( const char* test_name, const cl_uchar* expected, const cl_
 }
 
 // Copy a target value from src[idx] to dest[idx]
-static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti )
+static int l_copy(cl_uchar* dest, unsigned dest_idx, const cl_uchar* src,
+                  unsigned src_idx, const TypeInfo& ti)
 {
-    cl_uchar* raw_dest      = dest + dest_idx * ti.get_size();
-    const cl_uchar* raw_src =  src +  src_idx * ti.get_size();
-    memcpy( raw_dest, raw_src,  ti.get_value_size() );
+    cl_uchar* raw_dest = dest + dest_idx * ti.get_size();
+    const cl_uchar* raw_src = src + src_idx * ti.get_size();
+    memcpy(raw_dest, raw_src, ti.get_value_size());
 
     return 0;
 }
@@ -694,59 +839,70 @@ static std::string conversion_functions(const TypeInfo& ti)
     static char buf[MAX_STR];
     int num_printed = 0;
     // The atomic types just use the base type.
-    if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) {
+    if (ti.is_atomic()
+        || 0 == strcmp(ti.get_buf_elem_type(), ti.get_name_c_str()))
+    {
         // The type is represented in a buffer by itself.
-        num_printed = snprintf(buf,MAX_STR,
-                "%s from_buf(%s a) { return a; }\n"
-                "%s to_buf(%s a) { return a; }\n",
-                ti.get_buf_elem_type(), ti.get_buf_elem_type(),
-                ti.get_buf_elem_type(), ti.get_buf_elem_type() );
-    } else {
+        num_printed = snprintf(buf, MAX_STR,
+                               "%s from_buf(%s a) { return a; }\n"
+                               "%s to_buf(%s a) { return a; }\n",
+                               ti.get_buf_elem_type(), ti.get_buf_elem_type(),
+                               ti.get_buf_elem_type(), ti.get_buf_elem_type());
+    }
+    else
+    {
         // Just use C-style cast.
-        num_printed = snprintf(buf,MAX_STR,
-                "%s from_buf(%s a) { return (%s)a; }\n"
-                "%s to_buf(%s a) { return (%s)a; }\n",
-                ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(),
-                ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() );
+        num_printed = snprintf(buf, MAX_STR,
+                               "%s from_buf(%s a) { return (%s)a; }\n"
+                               "%s to_buf(%s a) { return (%s)a; }\n",
+                               ti.get_name_c_str(), ti.get_buf_elem_type(),
+                               ti.get_name_c_str(), ti.get_buf_elem_type(),
+                               ti.get_name_c_str(), ti.get_buf_elem_type());
     }
     // Add initializations.
-    if ( ti.is_atomic() ) {
-        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
-                "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" );
-    } else {
+    if (ti.is_atomic())
+    {
+        num_printed += snprintf(buf + num_printed, MAX_STR - num_printed,
+                                "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n");
+    }
+    else
+    {
         // This cast works even if the target type is a vector type.
-        num_printed += snprintf( buf + num_printed, MAX_STR-num_printed,
-                "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
+        num_printed +=
+            snprintf(buf + num_printed, MAX_STR - num_printed,
+                     "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str());
     }
-    assert( num_printed < MAX_STR ); // or increase MAX_STR
+    assert(num_printed < MAX_STR); // or increase MAX_STR
     result = buf;
     return result;
 }
 
-static std::string global_decls(const TypeInfo& ti, bool with_init )
+static std::string global_decls(const TypeInfo& ti, bool with_init)
 {
     const char* tn = ti.get_name_c_str();
     const char* vol = (ti.is_atomic() ? " volatile " : " ");
     static char decls[MAX_STR];
     int num_printed = 0;
-    if ( with_init ) {
-        const char *decls_template_with_init =
+    if (with_init)
+    {
+        const char* decls_template_with_init =
             "%s %s var = INIT_VAR(0);\n"
             "global %s %s g_var = INIT_VAR(1);\n"
             "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n"
             "volatile global %s %s* p_var = &a_var[1];\n\n";
-        num_printed = snprintf(decls,sizeof(decls),decls_template_with_init,
-                vol,tn,vol,tn,vol,tn,vol,tn);
-    } else {
-        const char *decls_template_no_init =
-            "%s %s var;\n"
-            "global %s %s g_var;\n"
-            "%s %s a_var[2];\n"
-            "global %s %s* p_var;\n\n";
-        num_printed = snprintf(decls,sizeof(decls),decls_template_no_init,
-             vol,tn,vol,tn,vol,tn,vol,tn);
-    }
-    assert( num_printed < sizeof(decls) );
+        num_printed = snprintf(decls, sizeof(decls), decls_template_with_init,
+                               vol, tn, vol, tn, vol, tn, vol, tn);
+    }
+    else
+    {
+        const char* decls_template_no_init = "%s %s var;\n"
+                                             "global %s %s g_var;\n"
+                                             "%s %s a_var[2];\n"
+                                             "global %s %s* p_var;\n\n";
+        num_printed = snprintf(decls, sizeof(decls), decls_template_no_init,
+                               vol, tn, vol, tn, vol, tn, vol, tn);
+    }
+    assert(num_printed < sizeof(decls));
     return std::string(decls);
 }
 
@@ -761,18 +917,26 @@ static std::string global_check_function(const TypeInfo& ti)
 
     // all() should only be used on vector inputs. For scalar comparison, the
     // result of the equality operator can be used as a bool value.
-    const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
+    const bool is_scalar =
+        ti.num_elem() == 0; // 0 is used to represent scalar types, not 1.
     const std::string is_equality_true = is_scalar ? "" : "all";
 
     std::string code = "kernel void global_check(global int* out) {\n";
     code += "  const " + type_name + " zero = ((" + type_name + ")0);\n";
     code += "  bool status = true;\n";
-    if (ti.is_atomic()) {
-        code += "  status &= " + is_equality_true + "(atomic_load(&var) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n";
-        code += "  status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n";
-    } else {
+    if (ti.is_atomic())
+    {
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&var) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&g_var) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&a_var[0]) == zero);\n";
+        code += "  status &= " + is_equality_true
+            + "(atomic_load(&a_var[1]) == zero);\n";
+    }
+    else
+    {
         code += "  status &= " + is_equality_true + "(var == zero);\n";
         code += "  status &= " + is_equality_true + "(g_var == zero);\n";
         code += "  status &= " + is_equality_true + "(a_var[0] == zero);\n";
@@ -792,7 +956,8 @@ static std::string writer_function(const TypeInfo& ti)
 {
     static char writer_src[MAX_STR];
     int num_printed = 0;
-    if ( !ti.is_atomic() ) {
+    if (!ti.is_atomic())
+    {
         const char* writer_template_normal =
             "kernel void writer( global %s* src, uint idx ) {\n"
             "  var = from_buf(src[0]);\n"
@@ -801,8 +966,11 @@ static std::string writer_function(const TypeInfo& ti)
             "  a_var[1] = from_buf(src[3]);\n"
             "  p_var = a_var + idx;\n"
             "}\n\n";
-        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type());
-    } else {
+        num_printed = snprintf(writer_src, sizeof(writer_src),
+                               writer_template_normal, ti.get_buf_elem_type());
+    }
+    else
+    {
         const char* writer_template_atomic =
             "kernel void writer( global %s* src, uint idx ) {\n"
             "  atomic_store( &var, from_buf(src[0]) );\n"
@@ -811,9 +979,10 @@ static std::string writer_function(const TypeInfo& ti)
             "  atomic_store( &a_var[1], from_buf(src[3]) );\n"
             "  p_var = a_var + idx;\n"
             "}\n\n";
-        num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type());
+        num_printed = snprintf(writer_src, sizeof(writer_src),
+                               writer_template_atomic, ti.get_buf_elem_type());
     }
-    assert( num_printed < sizeof(writer_src) );
+    assert(num_printed < sizeof(writer_src));
     std::string result = writer_src;
     return result;
 }
@@ -826,7 +995,8 @@ static std::string reader_function(const TypeInfo& ti)
 {
     static char reader_src[MAX_STR];
     int num_printed = 0;
-    if ( !ti.is_atomic() ) {
+    if (!ti.is_atomic())
+    {
         const char* reader_template_normal =
             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
             "  *p_var = from_buf(ptr_write_val);\n"
@@ -835,8 +1005,12 @@ static std::string reader_function(const TypeInfo& ti)
             "  dest[2] = to_buf(a_var[0]);\n"
             "  dest[3] = to_buf(a_var[1]);\n"
             "}\n\n";
-        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type());
-    } else {
+        num_printed =
+            snprintf(reader_src, sizeof(reader_src), reader_template_normal,
+                     ti.get_buf_elem_type(), ti.get_buf_elem_type());
+    }
+    else
+    {
         const char* reader_template_atomic =
             "kernel void reader( global %s* dest, %s ptr_write_val ) {\n"
             "  atomic_store( p_var, from_buf(ptr_write_val) );\n"
@@ -845,40 +1019,53 @@ static std::string reader_function(const TypeInfo& ti)
             "  dest[2] = to_buf( atomic_load( &a_var[0] ) );\n"
             "  dest[3] = to_buf( atomic_load( &a_var[1] ) );\n"
             "}\n\n";
-        num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type());
+        num_printed =
+            snprintf(reader_src, sizeof(reader_src), reader_template_atomic,
+                     ti.get_buf_elem_type(), ti.get_buf_elem_type());
     }
-    assert( num_printed < sizeof(reader_src) );
+    assert(num_printed < sizeof(reader_src));
     std::string result = reader_src;
     return result;
 }
 
 // Check that all globals where appropriately default-initialized.
-static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue)
+static int check_global_initialization(cl_context context, cl_program program,
+                                       cl_command_queue queue)
 {
     int status = CL_SUCCESS;
 
     // Create a buffer on device to store a unique integer.
     cl_int is_init_valid = 0;
-    clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status));
+    clMemWrapper buffer(
+        clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
+                       sizeof(is_init_valid), &is_init_valid, &status));
     test_error_ret(status, "Failed to allocate buffer", status);
 
     // Create, setup and invoke kernel.
-    clKernelWrapper global_check(clCreateKernel(program, "global_check", &status));
+    clKernelWrapper global_check(
+        clCreateKernel(program, "global_check", &status));
     test_error_ret(status, "Failed to create global_check kernel", status);
     status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer);
-    test_error_ret(status, "Failed to set up argument for the global_check kernel", status);
+    test_error_ret(status,
+                   "Failed to set up argument for the global_check kernel",
+                   status);
     const cl_uint work_dim = 1;
     const size_t global_work_offset[] = { 0 };
     const size_t global_work_size[] = { 1 };
-    status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr);
+    status = clEnqueueNDRangeKernel(queue, global_check, work_dim,
+                                    global_work_offset, global_work_size,
+                                    nullptr, 0, nullptr, nullptr);
     test_error_ret(status, "Failed to run global_check kernel", status);
     status = clFinish(queue);
     test_error_ret(status, "clFinish() failed", status);
 
     // Read back the memory buffer from the device.
-    status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr);
+    status =
+        clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid),
+                            &is_init_valid, 0, nullptr, nullptr);
     test_error_ret(status, "Failed to read buffer from device", status);
-    if (is_init_valid == 0) {
+    if (is_init_valid == 0)
+    {
         log_error("Unexpected default values were detected");
         return 1;
     }
@@ -887,58 +1074,75 @@ static int check_global_initialization(cl_context context, cl_program program, c
 }
 
 // Check write-then-read.
-static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_write_read(cl_device_id device, cl_context context,
+                        cl_command_queue queue)
 {
     int status = CL_SUCCESS;
     int itype;
 
-    RandomSeed rand_state( gRandomSeed );
+    RandomSeed rand_state(gRandomSeed);
 
-    for ( itype = 0; itype < num_type_info ; itype++ ) {
-        status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+    for (itype = 0; itype < num_type_info; itype++)
+    {
+        status = status
+            | l_write_read_for_type(device, context, queue, type_info[itype],
+                                    rand_state);
         FLUSH;
     }
 
     return status;
 }
 
-static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_write_read_for_type(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, const TypeInfo& ti,
+                                 RandomSeed& rand_state)
 {
     int err = CL_SUCCESS;
-    std::string type_name( ti.get_name() );
+    std::string type_name(ti.get_name());
     const char* tn = type_name.c_str();
-    log_info("  %s ",tn);
+    log_info("  %s ", tn);
 
     StringTable ksrc;
-    ksrc.add( l_get_fp64_pragma() );
-    ksrc.add( l_get_cles_int64_pragma() );
-    if (ti.is_atomic_64bit())
-      ksrc.add( l_get_int64_atomic_pragma() );
-    ksrc.add( conversion_functions(ti) );
-    ksrc.add( global_decls(ti,false) );
-    ksrc.add( global_check_function(ti) );
-    ksrc.add( writer_function(ti) );
-    ksrc.add( reader_function(ti) );
+    ksrc.add(l_get_fp64_pragma());
+    ksrc.add(l_get_cles_int64_pragma());
+    if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+    ksrc.add(conversion_functions(ti));
+    ksrc.add(global_decls(ti, false));
+    ksrc.add(global_check_function(ti));
+    ksrc.add(writer_function(ti));
+    ksrc.add(reader_function(ti));
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
-    test_error_ret(status,"Failed to create program for read-after-write test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+        OPTIONS);
+    test_error_ret(status, "Failed to create program for read-after-write test",
+                   status);
 
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for read-after-write test",status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status,
+                   "Failed to create reader kernel for read-after-write test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    size_t expected_used_bytes =
-        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
-        + ( l_64bit_device ? 8 : 4 ); // The pointer
-    if ( used_bytes < expected_used_bytes ) {
-        log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+            * ti.get_size() // Two regular variables and an array of 2 elements.
+        + (l_64bit_device ? 8 : 4); // The pointer
+    if (used_bytes < expected_used_bytes)
+    {
+        log_error("Error program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_used_bytes,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
@@ -951,90 +1155,131 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
 
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
-    test_error_ret(status,"Failed to allocate read buffer",status);
+    clMemWrapper write_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
+    clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         read_data_size, read_data, &status));
+    test_error_ret(status, "Failed to allocate read buffer", status);
 
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+    test_error_ret(status, "set arg", status);
 
     // Boolean random data needs to be massaged a bit more.
-    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
     unsigned bool_iter = 0;
 
-    for ( int iround = 0; iround < num_rounds ; iround++ ) {
-        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+    for (int iround = 0; iround < num_rounds; iround++)
+    {
+        for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+        { // Index into array, to write via pointer
             // Generate new random data to push through.
-            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+            // Generate 5 * 128 bytes all the time, even though the test for
+            // many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+            cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+                0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // For boolean, random data cast to bool isn't very random.
                 // So use the bottom bit of bool_value_iter to get true
                 // diversity.
-                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
-                    write_data[value_idx] = (1<<value_idx) & bool_iter;
-                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+                     value_idx++)
+                {
+                    write_data[value_idx] = (1 << value_idx) & bool_iter;
+                    // printf(" %s", (write_data[value_idx] ? "true" : "false"
+                    // ));
                 }
                 bool_iter++;
-            } else {
-                l_set_randomly( write_data, write_data_size, rand_state );
             }
-            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+            else
+            {
+                l_set_randomly(write_data, write_data_size, rand_state);
+            }
+            status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+            test_error_ret(status, "set arg", status);
 
             // The value to write via the pointer should be taken from the
             // 5th typed slot of the write_data.
-            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+            status = clSetKernelArg(
+                reader, 1, ti.get_size(),
+                write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+            test_error_ret(status, "set arg", status);
 
             // Determine the expected values.
             cl_uchar expected[read_data_size];
-            memset( expected, -1, sizeof(expected) );
-            l_copy( expected, 0, write_data, 0, ti );
-            l_copy( expected, 1, write_data, 1, ti );
-            l_copy( expected, 2, write_data, 2, ti );
-            l_copy( expected, 3, write_data, 3, ti );
-            // But we need to take into account the value from the pointer write.
-            // The 2 represents where the "a" array values begin in our read-back.
-            l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
+            memset(expected, -1, sizeof(expected));
+            l_copy(expected, 0, write_data, 0, ti);
+            l_copy(expected, 1, write_data, 1, ti);
+            l_copy(expected, 2, write_data, 2, ti);
+            l_copy(expected, 3, write_data, 3, ti);
+            // But we need to take into account the value from the pointer
+            // write. The 2 represents where the "a" array values begin in our
+            // read-back.
+            l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
 
             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    expected[i] = (bool)expected[i];
             }
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
             memset(read_data, -1, read_data_size);
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
             const size_t one = 1;
-            status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
-
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
-
-            if ( ti.is_bool() ) {
+            status =
+                clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue writer", status);
+            status =
+                clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue reader", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
+
+            read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
+
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    read_data[i] = (bool)read_data[i];
             }
 
             // Compare only the valid returned bytes.
-            int compare_result = l_compare( "read-after-write", expected, read_data, NUM_TESTED_VALUES-1, ti );
-            // log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            int compare_result =
+                l_compare("read-after-write", expected, read_data,
+                          NUM_TESTED_VALUES - 1, ti);
+            // log_info("Compared %d values each of size %llu. Result %d\n",
+            // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+            // compare_result );
             err |= compare_result;
 
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
-            if ( err ) break;
+            if (err) break;
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(write_data);
     align_free(read_data);
     return err;
@@ -1042,74 +1287,97 @@ static int l_write_read_for_type( cl_device_id device, cl_context context, cl_co
 
 
 // Check initialization, then, read, then write, then read.
-static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue )
+static int l_init_write_read(cl_device_id device, cl_context context,
+                             cl_command_queue queue)
 {
     int status = CL_SUCCESS;
     int itype;
 
-    RandomSeed rand_state( gRandomSeed );
+    RandomSeed rand_state(gRandomSeed);
 
-    for ( itype = 0; itype < num_type_info ; itype++ ) {
-        status = status | l_init_write_read_for_type(device,context,queue,type_info[itype], rand_state );
+    for (itype = 0; itype < num_type_info; itype++)
+    {
+        status = status
+            | l_init_write_read_for_type(device, context, queue,
+                                         type_info[itype], rand_state);
     }
     return status;
 }
-static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state )
+static int l_init_write_read_for_type(cl_device_id device, cl_context context,
+                                      cl_command_queue queue,
+                                      const TypeInfo& ti,
+                                      RandomSeed& rand_state)
 {
     int err = CL_SUCCESS;
-    std::string type_name( ti.get_name() );
+    std::string type_name(ti.get_name());
     const char* tn = type_name.c_str();
-    log_info("  %s ",tn);
+    log_info("  %s ", tn);
 
     StringTable ksrc;
-    ksrc.add( l_get_fp64_pragma() );
-    ksrc.add( l_get_cles_int64_pragma() );
-    if (ti.is_atomic_64bit())
-      ksrc.add( l_get_int64_atomic_pragma() );
-    ksrc.add( conversion_functions(ti) );
-    ksrc.add( global_decls(ti,true) );
-    ksrc.add( writer_function(ti) );
-    ksrc.add( reader_function(ti) );
+    ksrc.add(l_get_fp64_pragma());
+    ksrc.add(l_get_cles_int64_pragma());
+    if (ti.is_atomic_64bit()) ksrc.add(l_get_int64_atomic_pragma());
+    ksrc.add(conversion_functions(ti));
+    ksrc.add(global_decls(ti, true));
+    ksrc.add(writer_function(ti));
+    ksrc.add(reader_function(ti));
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper writer;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS);
-    test_error_ret(status,"Failed to create program for init-read-after-write test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer",
+        OPTIONS);
+    test_error_ret(status,
+                   "Failed to create program for init-read-after-write test",
+                   status);
 
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for init-read-after-write test",status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(
+        status, "Failed to create reader kernel for init-read-after-write test",
+        status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    size_t expected_used_bytes =
-        (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements.
-        + ( l_64bit_device ? 8 : 4 ); // The pointer
-    if ( used_bytes < expected_used_bytes ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    size_t expected_used_bytes = (NUM_TESTED_VALUES - 1)
+            * ti.get_size() // Two regular variables and an array of 2 elements.
+        + (l_64bit_device ? 8 : 4); // The pointer
+    if (used_bytes < expected_used_bytes)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_used_bytes,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // We need to create 5 random values of the given type,
     // and read 4 of them back.
     const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16);
-    const size_t read_data_size = (NUM_TESTED_VALUES-1) * sizeof(cl_ulong16);
+    const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16);
 
     cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT);
     cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT);
-    clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
-    clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) );
-    test_error_ret(status,"Failed to allocate read buffer",status);
-
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status);
+    clMemWrapper write_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
+    clMemWrapper read_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         read_data_size, read_data, &status));
+    test_error_ret(status, "Failed to allocate read buffer", status);
+
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &write_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &read_mem);
+    test_error_ret(status, "set arg", status);
 
     // Boolean random data needs to be massaged a bit more.
-    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS;
+    const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES) : NUM_ROUNDS;
     unsigned bool_iter = 0;
 
     // We need to count iterations.  We do something *different on the
@@ -1117,107 +1385,152 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
     // values.
     unsigned iteration = 0;
 
-    for ( int iround = 0; iround < num_rounds ; iround++ ) {
-        for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer
+    for (int iround = 0; iround < num_rounds; iround++)
+    {
+        for (cl_uint iptr_idx = 0; iptr_idx < 2; iptr_idx++)
+        { // Index into array, to write via pointer
             // Generate new random data to push through.
-            // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that.
+            // Generate 5 * 128 bytes all the time, even though the test for
+            // many types use less than all that.
 
-            cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0);
+            cl_uchar* write_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0,
+                0, 0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // For boolean, random data cast to bool isn't very random.
                 // So use the bottom bit of bool_value_iter to get true
                 // diversity.
-                for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) {
-                    write_data[value_idx] = (1<<value_idx) & bool_iter;
-                    //printf(" %s", (write_data[value_idx] ? "true" : "false" ));
+                for (unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES;
+                     value_idx++)
+                {
+                    write_data[value_idx] = (1 << value_idx) & bool_iter;
+                    // printf(" %s", (write_data[value_idx] ? "true" : "false"
+                    // ));
                 }
                 bool_iter++;
-            } else {
-                l_set_randomly( write_data, write_data_size, rand_state );
             }
-            status = clSetKernelArg(writer,1,sizeof(cl_uint),&iptr_idx); test_error_ret(status,"set arg",status);
+            else
+            {
+                l_set_randomly(write_data, write_data_size, rand_state);
+            }
+            status = clSetKernelArg(writer, 1, sizeof(cl_uint), &iptr_idx);
+            test_error_ret(status, "set arg", status);
 
-            if ( !iteration ) {
+            if (!iteration)
+            {
                 // On first iteration, the value we write via the last arg
                 // to the "reader" function is 0.
                 // It's way easier to code the test this way.
-                ti.init( write_data + (NUM_TESTED_VALUES-1)*ti.get_size(), 0 );
+                ti.init(write_data + (NUM_TESTED_VALUES - 1) * ti.get_size(),
+                        0);
             }
 
             // The value to write via the pointer should be taken from the
             // 5th typed slot of the write_data.
-            status = clSetKernelArg(reader,1,ti.get_size(),write_data + (NUM_TESTED_VALUES-1)*ti.get_size()); test_error_ret(status,"set arg",status);
+            status = clSetKernelArg(
+                reader, 1, ti.get_size(),
+                write_data + (NUM_TESTED_VALUES - 1) * ti.get_size());
+            test_error_ret(status, "set arg", status);
 
             // Determine the expected values.
             cl_uchar expected[read_data_size];
-            memset( expected, -1, sizeof(expected) );
-            if ( iteration ) {
-                l_copy( expected, 0, write_data, 0, ti );
-                l_copy( expected, 1, write_data, 1, ti );
-                l_copy( expected, 2, write_data, 2, ti );
-                l_copy( expected, 3, write_data, 3, ti );
-                // But we need to take into account the value from the pointer write.
-                // The 2 represents where the "a" array values begin in our read-back.
-                // But we need to take into account the value from the pointer write.
-                l_copy( expected, 2 + iptr_idx, write_data, 4, ti );
-            } else {
+            memset(expected, -1, sizeof(expected));
+            if (iteration)
+            {
+                l_copy(expected, 0, write_data, 0, ti);
+                l_copy(expected, 1, write_data, 1, ti);
+                l_copy(expected, 2, write_data, 2, ti);
+                l_copy(expected, 3, write_data, 3, ti);
+                // But we need to take into account the value from the pointer
+                // write. The 2 represents where the "a" array values begin in
+                // our read-back. But we need to take into account the value
+                // from the pointer write.
+                l_copy(expected, 2 + iptr_idx, write_data, 4, ti);
+            }
+            else
+            {
                 // On first iteration, expect these initialized values!
                 // See the decls_template_with_init above.
-                ti.init( expected, 0 );
-                ti.init( expected + ti.get_size(), 1 );
-                ti.init( expected + 2*ti.get_size(), 1 );
+                ti.init(expected, 0);
+                ti.init(expected + ti.get_size(), 1);
+                ti.init(expected + 2 * ti.get_size(), 1);
                 // Emulate the effect of the write via the pointer.
                 // The value is 0, not 1 (see above).
                 // The pointer is always initialized to the second element
                 // of the array. So it goes into slot 3 of the "expected" array.
-                ti.init( expected + 3*ti.get_size(), 0 );
+                ti.init(expected + 3 * ti.get_size(), 0);
             }
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) expected[i] = (bool)expected[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    expected[i] = (bool)expected[i];
             }
 
             clEnqueueUnmapMemObject(queue, write_mem, write_ptr, 0, 0, 0);
 
-            cl_uchar *read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
-            memset( read_data, -1, read_data_size );
+            cl_uchar* read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
+            memset(read_data, -1, read_data_size);
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
             // Now run the kernel
             const size_t one = 1;
-            if ( iteration ) {
-                status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-            } else {
+            if (iteration)
+            {
+                status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0,
+                                                0, 0);
+                test_error_ret(status, "enqueue writer", status);
+            }
+            else
+            {
                 // On first iteration, we should be picking up the
                 // initialized value. So don't enqueue the writer.
             }
-            status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
+            status =
+                clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+            test_error_ret(status, "enqueue reader", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
 
-            read_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0, 0, 0);
+            read_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, read_mem, CL_TRUE, CL_MAP_READ, 0, read_data_size, 0, 0,
+                0, 0);
 
-            if ( ti.is_bool() ) {
+            if (ti.is_bool())
+            {
                 // Collapse down to one bit.
-                for ( unsigned i = 0; i <  NUM_TESTED_VALUES-1 ; i++ ) read_data[i] = (bool)read_data[i];
+                for (unsigned i = 0; i < NUM_TESTED_VALUES - 1; i++)
+                    read_data[i] = (bool)read_data[i];
             }
 
             // Compare only the valid returned bytes.
-            //log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
-            int compare_result = l_compare( "init-write-read", expected, read_data, NUM_TESTED_VALUES-1, ti );
-            //log_info("Compared %d values each of size %llu. Result %d\n", NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(), compare_result );
+            // log_info(" Round %d ptr_idx %u\n", iround, iptr_idx );
+            int compare_result =
+                l_compare("init-write-read", expected, read_data,
+                          NUM_TESTED_VALUES - 1, ti);
+            // log_info("Compared %d values each of size %llu. Result %d\n",
+            // NUM_TESTED_VALUES-1, (unsigned long long)ti.get_value_size(),
+            // compare_result );
             err |= compare_result;
 
             clEnqueueUnmapMemObject(queue, read_mem, read_ptr, 0, 0, 0);
 
-            if ( err ) break;
+            if (err) break;
 
             iteration++;
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(write_data);
     align_free(read_data);
 
@@ -1226,12 +1539,14 @@ static int l_init_write_read_for_type( cl_device_id device, cl_context context,
 
 
 // Check that we can make at least one variable with size
-// max_size which is returned from the device info property : CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
-static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size )
+// max_size which is returned from the device info property :
+// CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE.
+static int l_capacity(cl_device_id device, cl_context context,
+                      cl_command_queue queue, size_t max_size)
 {
     int err = CL_SUCCESS;
     // Just test one type.
-    const TypeInfo ti( l_find_type("uchar") );
+    const TypeInfo ti(l_find_type("uchar"));
     log_info(" l_capacity...");
 
     const char prog_src_template[] =
@@ -1254,84 +1569,132 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue
         "  dest[get_global_linear_id()] = var[get_global_id(0)];\n"
         "}\n\n";
     char prog_src[MAX_STR];
-    int num_printed = snprintf(prog_src,sizeof(prog_src),prog_src_template,max_size, max_size);
-    assert( num_printed < MAX_STR ); // or increase MAX_STR
+    int num_printed = snprintf(prog_src, sizeof(prog_src), prog_src_template,
+                               max_size, max_size);
+    assert(num_printed < MAX_STR); // or increase MAX_STR
     (void)num_printed;
 
     StringTable ksrc;
-    ksrc.add( prog_src );
+    ksrc.add(prog_src);
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper get_max_size;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(), "get_max_size", OPTIONS);
-    test_error_ret(status,"Failed to create program for capacity test",status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &get_max_size, ksrc.num_str(), ksrc.strs(),
+        "get_max_size", OPTIONS);
+    test_error_ret(status, "Failed to create program for capacity test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
-    if ( used_bytes < max_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)max_size, (unsigned long long)used_bytes );
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
+    if (used_bytes < max_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)max_size, (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare to execute
-    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
-    test_error_ret(status,"Failed to create writer kernel for capacity test",status);
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for capacity test",status);
+    clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+    test_error_ret(status, "Failed to create writer kernel for capacity test",
+                   status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status, "Failed to create reader kernel for capacity test",
+                   status);
 
     cl_ulong max_size_ret = 0;
-    const size_t arr_size = 10*1024*1024;
-    cl_uchar* buffer = (cl_uchar*) align_malloc( arr_size, ALIGNMENT );
-
-    if ( !buffer ) { log_error("Failed to allocate buffer\n"); return 1; }
+    const size_t arr_size = 10 * 1024 * 1024;
+    cl_uchar* buffer = (cl_uchar*)align_malloc(arr_size, ALIGNMENT);
 
-    clMemWrapper max_size_ret_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(max_size_ret), &max_size_ret, &status ) );
-    test_error_ret(status,"Failed to allocate size query buffer",status);
-    clMemWrapper buffer_mem( clCreateBuffer( context, CL_MEM_READ_WRITE, arr_size, 0, &status ) );
-    test_error_ret(status,"Failed to allocate write buffer",status);
+    if (!buffer)
+    {
+        log_error("Failed to allocate buffer\n");
+        return 1;
+    }
 
-    status = clSetKernelArg(get_max_size,0,sizeof(cl_mem),&max_size_ret_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(writer,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&buffer_mem); test_error_ret(status,"set arg",status);
+    clMemWrapper max_size_ret_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                                 sizeof(max_size_ret),
+                                                 &max_size_ret, &status));
+    test_error_ret(status, "Failed to allocate size query buffer", status);
+    clMemWrapper buffer_mem(
+        clCreateBuffer(context, CL_MEM_READ_WRITE, arr_size, 0, &status));
+    test_error_ret(status, "Failed to allocate write buffer", status);
+
+    status = clSetKernelArg(get_max_size, 0, sizeof(cl_mem), &max_size_ret_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(writer, 0, sizeof(cl_mem), &buffer_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &buffer_mem);
+    test_error_ret(status, "set arg", status);
 
     // Check the macro value of CL_DEVICE_MAX_GLOBAL_VARIABLE
     const size_t one = 1;
-    status = clEnqueueNDRangeKernel(queue,get_max_size,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue size query",status);
-    status = clFinish(queue); test_error_ret(status,"finish",status);
-
-    cl_uchar *max_size_ret_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret), 0, 0, 0, 0);
-    if ( max_size_ret != max_size ) {
-        log_error("Error: preprocessor definition for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not match device query value %llu\n",
-                (unsigned long long) max_size_ret,
-                (unsigned long long) max_size );
+    status =
+        clEnqueueNDRangeKernel(queue, get_max_size, 1, 0, &one, 0, 0, 0, 0);
+    test_error_ret(status, "enqueue size query", status);
+    status = clFinish(queue);
+    test_error_ret(status, "finish", status);
+
+    cl_uchar* max_size_ret_ptr = (cl_uchar*)clEnqueueMapBuffer(
+        queue, max_size_ret_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(max_size_ret),
+        0, 0, 0, 0);
+    if (max_size_ret != max_size)
+    {
+        log_error("Error: preprocessor definition for "
+                  "CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE is %llu and does not "
+                  "match device query value %llu\n",
+                  (unsigned long long)max_size_ret,
+                  (unsigned long long)max_size);
         err |= 1;
     }
     clEnqueueUnmapMemObject(queue, max_size_ret_mem, max_size_ret_ptr, 0, 0, 0);
 
-    RandomSeed rand_state_write( gRandomSeed );
-    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
-        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
-        l_set_randomly( buffer, curr_size, rand_state_write );
-        status = clEnqueueWriteBuffer (queue, buffer_mem, CL_TRUE, 0, curr_size, buffer, 0, 0, 0);test_error_ret(status,"populate buffer_mem object",status);
-        status = clEnqueueNDRangeKernel(queue,writer,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-    status = clFinish(queue); test_error_ret(status,"finish",status);
-    }
-
-    RandomSeed rand_state_read( gRandomSeed );
-    for ( size_t offset = 0; offset < max_size ; offset += arr_size ) {
-        size_t curr_size = (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
-        status = clEnqueueNDRangeKernel(queue,reader,1,&offset,&curr_size,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-        cl_uchar* read_mem_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0, &status);test_error_ret(status,"map read data",status);
-        l_set_randomly( buffer, curr_size, rand_state_read );
-        err |= l_compare( "capacity", buffer, read_mem_ptr, curr_size, ti );
+    RandomSeed rand_state_write(gRandomSeed);
+    for (size_t offset = 0; offset < max_size; offset += arr_size)
+    {
+        size_t curr_size =
+            (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        l_set_randomly(buffer, curr_size, rand_state_write);
+        status = clEnqueueWriteBuffer(queue, buffer_mem, CL_TRUE, 0, curr_size,
+                                      buffer, 0, 0, 0);
+        test_error_ret(status, "populate buffer_mem object", status);
+        status = clEnqueueNDRangeKernel(queue, writer, 1, &offset, &curr_size,
+                                        0, 0, 0, 0);
+        test_error_ret(status, "enqueue writer", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
+    }
+
+    RandomSeed rand_state_read(gRandomSeed);
+    for (size_t offset = 0; offset < max_size; offset += arr_size)
+    {
+        size_t curr_size =
+            (max_size - offset) < arr_size ? (max_size - offset) : arr_size;
+        status = clEnqueueNDRangeKernel(queue, reader, 1, &offset, &curr_size,
+                                        0, 0, 0, 0);
+        test_error_ret(status, "enqueue reader", status);
+        cl_uchar* read_mem_ptr = (cl_uchar*)clEnqueueMapBuffer(
+            queue, buffer_mem, CL_TRUE, CL_MAP_READ, 0, curr_size, 0, 0, 0,
+            &status);
+        test_error_ret(status, "map read data", status);
+        l_set_randomly(buffer, curr_size, rand_state_read);
+        err |= l_compare("capacity", buffer, read_mem_ptr, curr_size, ti);
         clEnqueueUnmapMemObject(queue, buffer_mem, read_mem_ptr, 0, 0, 0);
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(buffer);
 
     return err;
@@ -1339,32 +1702,33 @@ static int l_capacity( cl_device_id device, cl_context context, cl_command_queue
 
 
 // Check operation on a user type.
-static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, bool separate_compile )
+static int l_user_type(cl_device_id device, cl_context context,
+                       cl_command_queue queue, bool separate_compile)
 {
     int err = CL_SUCCESS;
     // Just test one type.
-    const TypeInfo ti( l_find_type("uchar") );
-    log_info(" l_user_type %s...", separate_compile ? "separate compilation" : "single source compilation" );
+    const TypeInfo ti(l_find_type("uchar"));
+    log_info(" l_user_type %s...",
+             separate_compile ? "separate compilation"
+                              : "single source compilation");
 
-    if ( separate_compile && ! l_linker_available ) {
+    if (separate_compile && !l_linker_available)
+    {
         log_info("Separate compilation is not supported. Skipping test\n");
         return err;
     }
 
     const char type_src[] =
         "typedef struct { uchar c; uint i; } my_struct_t;\n\n";
-    const char def_src[] =
-        "my_struct_t var = { 'a', 42 };\n\n";
-    const char decl_src[] =
-        "extern my_struct_t var;\n\n";
+    const char def_src[] = "my_struct_t var = { 'a', 42 };\n\n";
+    const char decl_src[] = "extern my_struct_t var;\n\n";
 
     // Don't use a host struct. We can't guarantee that the host
     // compiler has the same structure layout as the device compiler.
-    const char writer_src[] =
-        "kernel void writer( uchar c, uint i ) {\n"
-        "  var.c = c;\n"
-        "  var.i = i;\n"
-        "}\n\n";
+    const char writer_src[] = "kernel void writer( uchar c, uint i ) {\n"
+                              "  var.c = c;\n"
+                              "  var.i = i;\n"
+                              "}\n\n";
     const char reader_src[] =
         "kernel void reader( global uchar* C, global uint* I ) {\n"
         "  *C = var.c;\n"
@@ -1373,36 +1737,53 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
 
     clProgramWrapper program;
 
-    if ( separate_compile ) {
+    if (separate_compile)
+    {
         // Separate compilation flow.
         StringTable wksrc;
-        wksrc.add( type_src );
-        wksrc.add( def_src );
-        wksrc.add( writer_src );
+        wksrc.add(type_src);
+        wksrc.add(def_src);
+        wksrc.add(writer_src);
 
         StringTable rksrc;
-        rksrc.add( type_src );
-        rksrc.add( decl_src );
-        rksrc.add( reader_src );
+        rksrc.add(type_src);
+        rksrc.add(decl_src);
+        rksrc.add(reader_src);
 
         int status = CL_SUCCESS;
-        clProgramWrapper writer_program( clCreateProgramWithSource( context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status ) );
-        test_error_ret(status,"Failed to create writer program for user type test",status);
-
-        status = clCompileProgram( writer_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
-        if(check_error(status, "Failed to compile writer program for user type test (%s)", IGetErrorString(status)))
+        clProgramWrapper writer_program(clCreateProgramWithSource(
+            context, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), &status));
+        test_error_ret(status,
+                       "Failed to create writer program for user type test",
+                       status);
+
+        status = clCompileProgram(writer_program, 1, &device, OPTIONS, 0, 0, 0,
+                                  0, 0);
+        if (check_error(
+                status,
+                "Failed to compile writer program for user type test (%s)",
+                IGetErrorString(status)))
         {
-            print_build_log(writer_program, 1, &device, wksrc.num_str(), wksrc.strs(), wksrc.lengths(), OPTIONS);
+            print_build_log(writer_program, 1, &device, wksrc.num_str(),
+                            wksrc.strs(), wksrc.lengths(), OPTIONS);
             return status;
         }
 
-        clProgramWrapper reader_program( clCreateProgramWithSource( context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status ) );
-        test_error_ret(status,"Failed to create reader program for user type test",status);
-
-        status = clCompileProgram( reader_program, 1, &device, OPTIONS, 0, 0, 0, 0, 0 );
-        if(check_error(status, "Failed to compile reader program for user type test (%s)", IGetErrorString(status)))
+        clProgramWrapper reader_program(clCreateProgramWithSource(
+            context, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), &status));
+        test_error_ret(status,
+                       "Failed to create reader program for user type test",
+                       status);
+
+        status = clCompileProgram(reader_program, 1, &device, OPTIONS, 0, 0, 0,
+                                  0, 0);
+        if (check_error(
+                status,
+                "Failed to compile reader program for user type test (%s)",
+                IGetErrorString(status)))
         {
-            print_build_log(reader_program, 1, &device, rksrc.num_str(), rksrc.strs(), rksrc.lengths(), OPTIONS);
+            print_build_log(reader_program, 1, &device, rksrc.num_str(),
+                            rksrc.strs(), rksrc.lengths(), OPTIONS);
             return status;
         }
 
@@ -1410,33 +1791,45 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
         progs[0] = writer_program;
         progs[1] = reader_program;
 
-        program = clLinkProgram( context, 1, &device, "", 2, progs, 0, 0, &status );
-        if(check_error(status, "Failed to link program for user type test (%s)", IGetErrorString(status)))
+        program =
+            clLinkProgram(context, 1, &device, "", 2, progs, 0, 0, &status);
+        if (check_error(status,
+                        "Failed to link program for user type test (%s)",
+                        IGetErrorString(status)))
         {
             print_build_log(program, 1, &device, 0, NULL, NULL, "");
             return status;
         }
-    } else {
+    }
+    else
+    {
         // Single compilation flow.
         StringTable ksrc;
-        ksrc.add( type_src );
-        ksrc.add( def_src );
-        ksrc.add( writer_src );
-        ksrc.add( reader_src );
+        ksrc.add(type_src);
+        ksrc.add(def_src);
+        ksrc.add(writer_src);
+        ksrc.add(reader_src);
 
         int status = CL_SUCCESS;
 
-        status = create_single_kernel_helper_create_program(context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
-        if(check_error(status, "Failed to build program for user type test (%s)", IGetErrorString(status)))
+        status = create_single_kernel_helper_create_program(
+            context, &program, ksrc.num_str(), ksrc.strs(), OPTIONS);
+        if (check_error(status,
+                        "Failed to build program for user type test (%s)",
+                        IGetErrorString(status)))
         {
-            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+                            ksrc.lengths(), OPTIONS);
             return status;
         }
 
         status = clBuildProgram(program, 1, &device, OPTIONS, 0, 0);
-        if(check_error(status, "Failed to compile program for user type test (%s)", IGetErrorString(status)))
+        if (check_error(status,
+                        "Failed to compile program for user type test (%s)",
+                        IGetErrorString(status)))
         {
-            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(), ksrc.lengths(), OPTIONS);
+            print_build_log(program, 1, &device, ksrc.num_str(), ksrc.strs(),
+                            ksrc.lengths(), OPTIONS);
             return status;
         }
     }
@@ -1444,48 +1837,71 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
 
     // Check size query.
     size_t used_bytes = 0;
-    int status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
+    int status = clGetProgramBuildInfo(
+        program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+        sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
     size_t expected_size = sizeof(cl_uchar) + sizeof(cl_uint);
-    if ( used_bytes < expected_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+    if (used_bytes < expected_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_size,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare to execute
-    clKernelWrapper writer( clCreateKernel( program, "writer", &status ) );
-    test_error_ret(status,"Failed to create writer kernel for user type test",status);
-    clKernelWrapper reader( clCreateKernel( program, "reader", &status ) );
-    test_error_ret(status,"Failed to create reader kernel for user type test",status);
+    clKernelWrapper writer(clCreateKernel(program, "writer", &status));
+    test_error_ret(status, "Failed to create writer kernel for user type test",
+                   status);
+    clKernelWrapper reader(clCreateKernel(program, "reader", &status));
+    test_error_ret(status, "Failed to create reader kernel for user type test",
+                   status);
 
     // Set up data.
     cl_uchar* uchar_data = (cl_uchar*)align_malloc(sizeof(cl_uchar), ALIGNMENT);
     cl_uint* uint_data = (cl_uint*)align_malloc(sizeof(cl_uint), ALIGNMENT);
 
-    clMemWrapper uchar_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status ) );
-    test_error_ret(status,"Failed to allocate uchar buffer",status);
-    clMemWrapper uint_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(cl_uint), uint_data, &status ) );
-    test_error_ret(status,"Failed to allocate uint buffer",status);
+    clMemWrapper uchar_mem(clCreateBuffer(
+        context, CL_MEM_USE_HOST_PTR, sizeof(cl_uchar), uchar_data, &status));
+    test_error_ret(status, "Failed to allocate uchar buffer", status);
+    clMemWrapper uint_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                         sizeof(cl_uint), uint_data, &status));
+    test_error_ret(status, "Failed to allocate uint buffer", status);
 
-    status = clSetKernelArg(reader,0,sizeof(cl_mem),&uchar_mem); test_error_ret(status,"set arg",status);
-    status = clSetKernelArg(reader,1,sizeof(cl_mem),&uint_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(reader, 0, sizeof(cl_mem), &uchar_mem);
+    test_error_ret(status, "set arg", status);
+    status = clSetKernelArg(reader, 1, sizeof(cl_mem), &uint_mem);
+    test_error_ret(status, "set arg", status);
 
     cl_uchar expected_uchar = 'a';
     cl_uint expected_uint = 42;
-    for ( unsigned iter = 0; iter < 5 ; iter++ ) { // Must go around at least twice
+    for (unsigned iter = 0; iter < 5; iter++)
+    { // Must go around at least twice
         // Read back data
         *uchar_data = -1;
         *uint_data = -1;
         const size_t one = 1;
-        status = clEnqueueNDRangeKernel(queue,reader,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue reader",status);
-        status = clFinish(queue); test_error_ret(status,"finish",status);
-
-        cl_uchar *uint_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint), 0, 0, 0, 0);
-        cl_uchar *uchar_data_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0, 0, 0);
-
-        if ( expected_uchar != *uchar_data || expected_uint != *uint_data ) {
-            log_error("FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
-                    iter, (int)*uchar_data, *uint_data, (int)expected_uchar, expected_uint );
+        status = clEnqueueNDRangeKernel(queue, reader, 1, 0, &one, 0, 0, 0, 0);
+        test_error_ret(status, "enqueue reader", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
+
+        cl_uchar* uint_data_ptr =
+            (cl_uchar*)clEnqueueMapBuffer(queue, uint_mem, CL_TRUE, CL_MAP_READ,
+                                          0, sizeof(cl_uint), 0, 0, 0, 0);
+        cl_uchar* uchar_data_ptr = (cl_uchar*)clEnqueueMapBuffer(
+            queue, uchar_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uchar), 0, 0,
+            0, 0);
+
+        if (expected_uchar != *uchar_data || expected_uint != *uint_data)
+        {
+            log_error(
+                "FAILED: Iteration %d Got (0x%2x,%d) but expected (0x%2x,%d)\n",
+                iter, (int)*uchar_data, *uint_data, (int)expected_uchar,
+                expected_uint);
             err |= 1;
         }
 
@@ -1499,13 +1915,21 @@ static int l_user_type( cl_device_id device, cl_context context, cl_command_queu
         // Write the new values into persistent store.
         *uchar_data = expected_uchar;
         *uint_data = expected_uint;
-        status = clSetKernelArg(writer,0,sizeof(cl_uchar),uchar_data); test_error_ret(status,"set arg",status);
-        status = clSetKernelArg(writer,1,sizeof(cl_uint),uint_data); test_error_ret(status,"set arg",status);
-        status = clEnqueueNDRangeKernel(queue,writer,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue writer",status);
-        status = clFinish(queue); test_error_ret(status,"finish",status);
+        status = clSetKernelArg(writer, 0, sizeof(cl_uchar), uchar_data);
+        test_error_ret(status, "set arg", status);
+        status = clSetKernelArg(writer, 1, sizeof(cl_uint), uint_data);
+        test_error_ret(status, "set arg", status);
+        status = clEnqueueNDRangeKernel(queue, writer, 1, 0, &one, 0, 0, 0, 0);
+        test_error_ret(status, "enqueue writer", status);
+        status = clFinish(queue);
+        test_error_ret(status, "finish", status);
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
     align_free(uchar_data);
     align_free(uint_data);
     return err;
@@ -1540,7 +1964,8 @@ static cl_int should_skip(cl_device_id device, cl_bool& skip)
 
 
 // Test support for variables at program scope. Miscellaneous
-int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_misc(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1559,19 +1984,20 @@ int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_com
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_capacity( device, context, queue, max_size );
-    err |= l_user_type( device, context, queue, false );
-    err |= l_user_type( device, context, queue, true );
+    err |= l_capacity(device, context, queue, max_size);
+    err |= l_user_type(device, context, queue, false);
+    err |= l_user_type(device, context, queue, true);
 
     return err;
 }
 
 
 // Test support for variables at program scope. Unitialized data
-int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context,
+                                   cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1591,16 +2017,17 @@ int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_c
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_write_read( device, context, queue );
+    err |= l_write_read(device, context, queue);
 
     return err;
 }
 
 // Test support for variables at program scope. Initialized data.
-int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_prog_scope_init(cl_device_id device, cl_context context,
+                                 cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1619,17 +2046,18 @@ int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_com
 
     cl_int err = CL_SUCCESS;
 
-    err = l_get_device_info( device, &max_size, &pref_size );
-    err |= l_build_type_table( device );
+    err = l_get_device_info(device, &max_size, &pref_size);
+    err |= l_build_type_table(device);
 
-    err |= l_init_write_read( device, context, queue );
+    err |= l_init_write_read(device, context, queue);
 
     return err;
 }
 
 
 // A simple test for support of static variables inside a kernel.
-int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_progvar_func_scope(cl_device_id device, cl_context context,
+                            cl_command_queue queue, int num_elements)
 {
     cl_bool skip{ CL_FALSE };
     auto error = should_skip(device, skip);
@@ -1649,48 +2077,64 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_
     // Deliberately have two variables with the same name but in different
     // scopes.
     // Also, use a large initialized structure in both cases.
+    // clang-format off
     const char prog_src[] =
         "typedef struct { char c; int16 i; } mystruct_t;\n"
-        "kernel void test_bump( global int* value, int which ) {\n"
-        "  if ( which ) {\n"
+        "kernel void test_bump(global int* value, int which) {\n"
+        "  if (which) {\n"
         // Explicit address space.
         // Last element set to 0
-        "     static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
+        "     static global mystruct_t persistent = { 'a', (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n"
         "     *value = persistent.i.sf++;\n"
         "  } else {\n"
         // Implicitly global
         // Last element set to 100
-        "     static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
+        "     static mystruct_t persistent = { 'b' , (int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n"
         "     *value = persistent.i.sf++;\n"
         "  }\n"
         "}\n";
+    // clang-format on
 
     StringTable ksrc;
-    ksrc.add( prog_src );
+    ksrc.add(prog_src);
 
     int status = CL_SUCCESS;
     clProgramWrapper program;
     clKernelWrapper test_bump;
 
-    status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS);
-    test_error_ret(status, "Failed to create program for function static variable test", status);
+    status = create_single_kernel_helper_with_build_options(
+        context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump",
+        OPTIONS);
+    test_error_ret(status,
+                   "Failed to create program for function static variable test",
+                   status);
 
     // Check size query.
     size_t used_bytes = 0;
-    status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 );
-    test_error_ret(status,"Failed to query global variable total size",status);
+    status = clGetProgramBuildInfo(program, device,
+                                   CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE,
+                                   sizeof(used_bytes), &used_bytes, 0);
+    test_error_ret(status, "Failed to query global variable total size",
+                   status);
     size_t expected_size = 2 * sizeof(cl_int); // Two ints.
-    if ( used_bytes < expected_size ) {
-        log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes );
+    if (used_bytes < expected_size)
+    {
+        log_error("Error: program query for global variable total size query "
+                  "failed: Expected at least %llu but got %llu\n",
+                  (unsigned long long)expected_size,
+                  (unsigned long long)used_bytes);
         err |= 1;
     }
 
     // Prepare the data.
     cl_int counter_value = 0;
-    clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) );
-    test_error_ret(status,"Failed to allocate counter query buffer",status);
+    clMemWrapper counter_value_mem(clCreateBuffer(context, CL_MEM_USE_HOST_PTR,
+                                                  sizeof(counter_value),
+                                                  &counter_value, &status));
+    test_error_ret(status, "Failed to allocate counter query buffer", status);
 
-    status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status);
+    status = clSetKernelArg(test_bump, 0, sizeof(cl_mem), &counter_value_mem);
+    test_error_ret(status, "set arg", status);
 
     // Go a few rounds, alternating between the two counters in the kernel.
 
@@ -1700,26 +2144,41 @@ int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_
     cl_int expected_counter[2] = { 100, 0 };
 
     const size_t one = 1;
-    for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around
-        for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters
-            status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status);
-            status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status);
-            status = clFinish(queue); test_error_ret(status,"finish",status);
-
-            cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0);
-
-            if ( counter_value != expected_counter[iwhich] ) {
-                log_error("Error: Round %d on counter %d: Expected %d but got %d\n",
-                        iround, iwhich, expected_counter[iwhich], counter_value );
+    for (int iround = 0; iround < 5; iround++)
+    { // Must go at least twice around
+        for (int iwhich = 0; iwhich < 2; iwhich++)
+        { // Cover both counters
+            status = clSetKernelArg(test_bump, 1, sizeof(iwhich), &iwhich);
+            test_error_ret(status, "set arg", status);
+            status = clEnqueueNDRangeKernel(queue, test_bump, 1, 0, &one, 0, 0,
+                                            0, 0);
+            test_error_ret(status, "enqueue test_bump", status);
+            status = clFinish(queue);
+            test_error_ret(status, "finish", status);
+
+            cl_uchar* counter_value_ptr = (cl_uchar*)clEnqueueMapBuffer(
+                queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0,
+                sizeof(counter_value), 0, 0, 0, 0);
+
+            if (counter_value != expected_counter[iwhich])
+            {
+                log_error(
+                    "Error: Round %d on counter %d: Expected %d but got %d\n",
+                    iround, iwhich, expected_counter[iwhich], counter_value);
                 err |= 1;
             }
             expected_counter[iwhich]++; // Emulate behaviour of the kernel.
 
-            clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0);
+            clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr,
+                                    0, 0, 0);
         }
     }
 
-    if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; }
+    if (CL_SUCCESS == err)
+    {
+        log_info("OK\n");
+        FLUSH;
+    }
 
     return err;
 }
-- 
cgit v1.2.3


From f6a963a5830624c94bb38349d3df685d6a9d35dd Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 13 Oct 2022 10:02:40 +0100
Subject: harness: Fix -Wformat warnings (#1527)

The main sources of warnings were:

 * Printing of a `size_t` which requires the `%zu` specifier.

 * Printing of `cl_long`/`cl_ulong` which is now done using the
   `PRI*64` macros to ensure portability across 32 and 64-bit builds.

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>

Signed-off-by: Sven van Haastregt <sven.vanhaastregt@arm.com>
---
 test_common/harness/conversions.cpp     |  5 +++--
 test_common/harness/imageHelpers.cpp    |  7 ++++---
 test_common/harness/propertyHelpers.cpp | 11 ++++++-----
 test_common/harness/testHarness.cpp     |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/test_common/harness/conversions.cpp b/test_common/harness/conversions.cpp
index c7731269..d52a2ac6 100644
--- a/test_common/harness/conversions.cpp
+++ b/test_common/harness/conversions.cpp
@@ -14,6 +14,7 @@
 // limitations under the License.
 //
 #include "conversions.h"
+#include <cinttypes>
 #include <limits.h>
 #include <time.h>
 #include <assert.h>
@@ -50,10 +51,10 @@ void print_type_to_string(ExplicitType type, void *data, char *string)
         case kInt: sprintf(string, "%d", *((cl_int *)data)); return;
         case kUInt:
         case kUnsignedInt: sprintf(string, "%u", *((cl_uint *)data)); return;
-        case kLong: sprintf(string, "%lld", *((cl_long *)data)); return;
+        case kLong: sprintf(string, "%" PRId64 "", *((cl_long *)data)); return;
         case kULong:
         case kUnsignedLong:
-            sprintf(string, "%llu", *((cl_ulong *)data));
+            sprintf(string, "%" PRIu64 "", *((cl_ulong *)data));
             return;
         case kFloat: sprintf(string, "%f", *((cl_float *)data)); return;
         case kHalf: sprintf(string, "half"); return;
diff --git a/test_common/harness/imageHelpers.cpp b/test_common/harness/imageHelpers.cpp
index 3dbdffa0..f1694e88 100644
--- a/test_common/harness/imageHelpers.cpp
+++ b/test_common/harness/imageHelpers.cpp
@@ -23,6 +23,7 @@
 #include <malloc.h>
 #endif
 #include <algorithm>
+#include <cinttypes>
 #include <iterator>
 #if !defined(_WIN32)
 #include <cmath>
@@ -421,7 +422,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
               (int)thirdDim, (int)imageInfo->rowPitch,
               (int)imageInfo->rowPitch
                   - (int)imageInfo->width * (int)pixel_size);
-    log_error("Failed at column: %ld   ", where);
+    log_error("Failed at column: %zu   ", where);
 
     switch (pixel_size)
     {
@@ -454,7 +455,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
                 ((cl_ushort *)destPixel)[1], ((cl_ushort *)destPixel)[2]);
             break;
         case 8:
-            log_error("*0x%16.16llx vs. 0x%16.16llx\n",
+            log_error("*0x%16.16" PRIx64 " vs. 0x%16.16" PRIx64 "\n",
                       ((cl_ulong *)sourcePixel)[0], ((cl_ulong *)destPixel)[0]);
             break;
         case 12:
@@ -473,7 +474,7 @@ void print_first_pixel_difference_error(size_t where, const char *sourcePixel,
                       ((cl_uint *)destPixel)[2], ((cl_uint *)destPixel)[3]);
             break;
         default:
-            log_error("Don't know how to print pixel size of %ld\n",
+            log_error("Don't know how to print pixel size of %zu\n",
                       pixel_size);
             break;
     }
diff --git a/test_common/harness/propertyHelpers.cpp b/test_common/harness/propertyHelpers.cpp
index e368f9b6..6a10c076 100644
--- a/test_common/harness/propertyHelpers.cpp
+++ b/test_common/harness/propertyHelpers.cpp
@@ -19,6 +19,7 @@
 #include <assert.h>
 
 #include <algorithm>
+#include <cinttypes>
 #include <vector>
 
 static bool findProperty(const std::vector<cl_properties>& props,
@@ -97,16 +98,16 @@ int compareProperties(const std::vector<cl_properties>& queried,
 
             if (!found)
             {
-                log_error("ERROR: expected property 0x%llx not found!\n",
+                log_error("ERROR: expected property 0x%" PRIx64 " not found!\n",
                           check_prop);
                 return TEST_FAIL;
             }
             else if (check_value != queried_value)
             {
-                log_error(
-                    "ERROR: mis-matched value for property 0x%llx: wanted "
-                    "0x%llx, got 0x%llx\n",
-                    check_prop, check_value, queried_value);
+                log_error("ERROR: mis-matched value for property 0x%" PRIx64
+                          ": wanted "
+                          "0x%" PRIx64 ", got 0x%" PRIx64 "\n",
+                          check_prop, check_value, queried_value);
                 return TEST_FAIL;
             }
         }
diff --git a/test_common/harness/testHarness.cpp b/test_common/harness/testHarness.cpp
index d07d982c..a309f53d 100644
--- a/test_common/harness/testHarness.cpp
+++ b/test_common/harness/testHarness.cpp
@@ -1198,7 +1198,7 @@ cl_platform_id getPlatformFromDevice(cl_device_id deviceID)
 
 void PrintArch(void)
 {
-    vlog("sizeof( void*) = %ld\n", sizeof(void *));
+    vlog("sizeof( void*) = %zu\n", sizeof(void *));
 #if defined(__ppc__)
     vlog("ARCH:\tppc\n");
 #elif defined(__ppc64__)
-- 
cgit v1.2.3


From 5e116e7b0d7fe29f637cdd4cff87ff996d91cb22 Mon Sep 17 00:00:00 2001
From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com>
Date: Fri, 14 Oct 2022 09:53:33 +0100
Subject: Use CTS type wrappers for  test_sizeof. (#1547)

Signed-off-by: John Kesapides <john.kesapides@arm.com>

Signed-off-by: John Kesapides <john.kesapides@arm.com>
---
 test_conformance/basic/test_sizeof.cpp | 39 +++++++---------------------------
 1 file changed, 8 insertions(+), 31 deletions(-)

diff --git a/test_conformance/basic/test_sizeof.cpp b/test_conformance/basic/test_sizeof.cpp
index 6b1ddb56..e980ed68 100644
--- a/test_conformance/basic/test_sizeof.cpp
+++ b/test_conformance/basic/test_sizeof.cpp
@@ -35,9 +35,9 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
         "}\n"
     };
 
-    cl_program  p;
-    cl_kernel   k;
-    cl_mem      m;
+    clProgramWrapper p;
+    clKernelWrapper k;
+    clMemWrapper m;
     cl_uint        temp;
 
 
@@ -51,42 +51,19 @@ cl_int get_type_size( cl_context context, cl_command_queue queue, const char *ty
     }
     cl_int err = create_single_kernel_helper_with_build_options(
         context, &p, &k, 4, sizeof_kernel_code, "test_sizeof", nullptr);
-    if( err )
-        return err;
+    test_error(err, "Failed to build kernel/program.");
 
     m = clCreateBuffer( context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof( cl_ulong ), size, &err );
-    if( NULL == m )
-    {
-        clReleaseProgram( p );
-        clReleaseKernel( k );
-        log_error("\nclCreateBuffer FAILED\n");
-        return err;
-    }
+    test_error(err, "clCreateBuffer failed.");
 
     err = clSetKernelArg( k, 0, sizeof( cl_mem ), &m );
-    if( err )
-    {
-        clReleaseProgram( p );
-        clReleaseKernel( k );
-        clReleaseMemObject( m );
-        log_error("\nclSetKernelArg FAILED\n");
-        return err;
-    }
+    test_error(err, "clSetKernelArg failed.");
 
     err = clEnqueueTask( queue, k, 0, NULL, NULL );
-    clReleaseProgram( p );
-    clReleaseKernel( k );
-    if( err )
-    {
-        clReleaseMemObject( m );
-        log_error( "\nclEnqueueTask FAILED\n" );
-        return err;
-    }
+    test_error(err, "clEnqueueTask failed.");
 
     err = clEnqueueReadBuffer( queue, m, CL_TRUE, 0, sizeof( cl_uint ), &temp, 0, NULL, NULL );
-    clReleaseMemObject( m );
-    if( err )
-        log_error( "\nclEnqueueReadBuffer FAILED\n" );
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     *size = (cl_ulong) temp;
 
-- 
cgit v1.2.3


From 90a5183ec499d5b4701f58f6134dd424d82c4dca Mon Sep 17 00:00:00 2001
From: John Kesapides <46718829+JohnKesapidesARM@users.noreply.github.com>
Date: Fri, 14 Oct 2022 09:55:10 +0100
Subject: Use CTS type wrappers for test_enqueued_local_size (#1544)

Signed-off-by: John Kesapides <john.kesapides@arm.com>

Signed-off-by: John Kesapides <john.kesapides@arm.com>
---
 .../basic/test_enqueued_local_size.cpp             | 122 ++++++++++-----------
 1 file changed, 59 insertions(+), 63 deletions(-)

diff --git a/test_conformance/basic/test_enqueued_local_size.cpp b/test_conformance/basic/test_enqueued_local_size.cpp
index 91fe1434..ea95df68 100644
--- a/test_conformance/basic/test_enqueued_local_size.cpp
+++ b/test_conformance/basic/test_enqueued_local_size.cpp
@@ -1,6 +1,6 @@
 //
 // Copyright (c) 2017 The Khronos Group Inc.
-// 
+//
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
@@ -26,32 +26,33 @@
 
 #include "procs.h"
 
-static const char *enqueued_local_size_2d_code =
-"__kernel void test_enqueued_local_size_2d(global int *dst)\n"
-"{\n"
-"    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))\n"
-"    {\n"
-"        dst[0] = (int)get_enqueued_local_size(0)\n;"
-"        dst[1] = (int)get_enqueued_local_size(1)\n;"
-"    }\n"
-"}\n";
-
-static const char *enqueued_local_size_1d_code =
-"__kernel void test_enqueued_local_size_1d(global int *dst)\n"
-"{\n"
-"    int  tid_x = get_global_id(0);\n"
-"    if (get_global_id(0) == 0)\n"
-"    {\n"
-"        dst[tid_x] = (int)get_enqueued_local_size(0)\n;"
-"    }\n"
-"}\n";
-
-
-static int
-verify_enqueued_local_size(int *result, size_t *expected, int n)
+static const char *enqueued_local_size_2d_code = R"(
+__kernel void test_enqueued_local_size_2d(global int *dst)
+{
+    if ((get_global_id(0) == 0) && (get_global_id(1) == 0))
+    {
+        dst[0] = (int)get_enqueued_local_size(0);
+        dst[1] = (int)get_enqueued_local_size(1);
+    }
+}
+)";
+
+static const char *enqueued_local_size_1d_code = R"(
+__kernel void test_enqueued_local_size_1d(global int *dst)
+{
+    int  tid_x = get_global_id(0);
+    if (get_global_id(0) == 0)
+    {
+        dst[tid_x] = (int)get_enqueued_local_size(0);
+    }
+}
+)";
+
+
+static int verify_enqueued_local_size(int *result, size_t *expected, int n)
 {
     int i;
-    for (i=0; i<n; i++)
+    for (i = 0; i < n; i++)
     {
         if (result[i] != (int)expected[i])
         {
@@ -64,14 +65,14 @@ verify_enqueued_local_size(int *result, size_t *expected, int n)
 }
 
 
-int
-test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
+int test_enqueued_local_size(cl_device_id device, cl_context context,
+                             cl_command_queue queue, int num_elements)
 {
-    cl_mem streams;
-    cl_program program[2];
-    cl_kernel kernel[2];
+    clMemWrapper stream;
+    clProgramWrapper program[2];
+    clKernelWrapper kernel[2];
 
-    int *output_ptr;
+    cl_int output_ptr[2];
     size_t globalsize[2];
     size_t localsize[2];
     int err;
@@ -97,34 +98,33 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que
         }
     }
 
-    output_ptr   = (int*)malloc(2 * sizeof(int));
-
-    streams =
-        clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(int), NULL, &err);
-    test_error( err, "clCreateBuffer failed.");
+    stream = clCreateBuffer(context, CL_MEM_READ_WRITE, 2 * sizeof(cl_int),
+                            nullptr, &err);
+    test_error(err, "clCreateBuffer failed.");
 
     std::string cl_std = "-cl-std=CL";
     cl_std += (get_device_cl_version(device) == Version(3, 0)) ? "3.0" : "2.0";
     err = create_single_kernel_helper_with_build_options(
         context, &program[0], &kernel[0], 1, &enqueued_local_size_1d_code,
         "test_enqueued_local_size_1d", cl_std.c_str());
-    test_error( err, "create_single_kernel_helper failed");
+    test_error(err, "create_single_kernel_helper failed");
     err = create_single_kernel_helper_with_build_options(
         context, &program[1], &kernel[1], 1, &enqueued_local_size_2d_code,
         "test_enqueued_local_size_2d", cl_std.c_str());
-    test_error( err, "create_single_kernel_helper failed");
+    test_error(err, "create_single_kernel_helper failed");
 
-    err  = clSetKernelArg(kernel[0], 0, sizeof streams, &streams);
-    test_error( err, "clSetKernelArgs failed.");
-    err  = clSetKernelArg(kernel[1], 0, sizeof streams, &streams);
-    test_error( err, "clSetKernelArgs failed.");
+    err = clSetKernelArg(kernel[0], 0, sizeof stream, &stream);
+    test_error(err, "clSetKernelArgs failed.");
+    err = clSetKernelArg(kernel[1], 0, sizeof stream, &stream);
+    test_error(err, "clSetKernelArgs failed.");
 
-    globalsize[0] = (size_t)num_elements;
-    globalsize[1] = (size_t)num_elements;
+    globalsize[0] = static_cast<size_t>(num_elements);
+    globalsize[1] = static_cast<size_t>(num_elements);
 
     size_t max_wgs;
-    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_wgs), &max_wgs, NULL);
-    test_error( err, "clGetDeviceInfo failed.");
+    err = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+                          sizeof(max_wgs), &max_wgs, nullptr);
+    test_error(err, "clGetDeviceInfo failed.");
 
     localsize[0] = std::min<size_t>(16, max_wgs);
     localsize[1] = std::min<size_t>(11, max_wgs / localsize[0]);
@@ -143,35 +143,31 @@ test_enqueued_local_size(cl_device_id device, cl_context context, cl_command_que
         }
     }
 
-    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, NULL, globalsize, localsize, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 2, nullptr, globalsize,
+                                 localsize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
+    err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+                              output_ptr, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     err = verify_enqueued_local_size(output_ptr, localsize, 2);
 
-    globalsize[0] = (size_t)num_elements;
+    globalsize[0] = static_cast<size_t>(num_elements);
     localsize[0] = 9;
     if (use_uniform_work_groups && (globalsize[0] % localsize[0]))
     {
         globalsize[0] += (localsize[0] - (globalsize[0] % localsize[0]));
     }
-    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, NULL, globalsize, localsize, 0, NULL, NULL);
-    test_error( err, "clEnqueueNDRangeKernel failed.");
+    err = clEnqueueNDRangeKernel(queue, kernel[1], 1, nullptr, globalsize,
+                                 localsize, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueNDRangeKernel failed.");
 
-    err = clEnqueueReadBuffer(queue, streams, CL_TRUE, 0, 2*sizeof(int), output_ptr, 0, NULL, NULL);
-    test_error( err, "clEnqueueReadBuffer failed.");
+    err = clEnqueueReadBuffer(queue, stream, CL_BLOCKING, 0, 2 * sizeof(int),
+                              output_ptr, 0, nullptr, nullptr);
+    test_error(err, "clEnqueueReadBuffer failed.");
 
     err = verify_enqueued_local_size(output_ptr, localsize, 1);
 
-    // cleanup
-    clReleaseMemObject(streams);
-    clReleaseKernel(kernel[0]);
-    clReleaseKernel(kernel[1]);
-    clReleaseProgram(program[0]);
-    clReleaseProgram(program[1]);
-    free(output_ptr);
-
     return err;
 }
-- 
cgit v1.2.3