1 files changed, 184 insertions, 210 deletions
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d09915f6..412f210b 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -14,14 +14,18 @@
 // limitations under the License.
 //
 
+#include "common.h"
 #include "function_list.h"
 #include "test_functions.h"
 #include "utility.h"
 
+#include <cinttypes>
 #include <cstring>
 
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
-                       cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+                cl_kernel *k, cl_program *p, bool relaxedMode)
 {
     const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
                         "__kernel void math_kernel",
@@ -107,54 +111,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
                        relaxedMode);
 }
 
-typedef struct BuildKernelInfo
-{
-    cl_uint offset; // the first vector size to build
-    cl_uint kernel_count;
-    cl_kernel **kernels;
-    cl_program *programs;
-    const char *nameInCode;
-    bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
 {
     BuildKernelInfo *info = (BuildKernelInfo *)p;
-    cl_uint i = info->offset + job_id;
-    return BuildKernel(info->nameInCode, i, info->kernel_count,
-                       info->kernels[i], info->programs + i, info->relaxedMode);
+    cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+    return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+                       info->kernels[vectorSize].data(),
+                       &(info->programs[vectorSize]), info->relaxedMode);
 }
 
 // Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
 {
-    cl_mem inBuf; // input buffer for the thread
-    cl_mem inBuf2; // input buffer for the thread
-    cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
-    MTdata d;
-    cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
+    // Input and output buffers for the thread
+    clMemWrapper inBuf;
+    clMemWrapper inBuf2;
+    Buffers outBuf;
+
+    MTdataHolder d;
+
+    // Per thread command queue to improve performance
+    clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
 {
     size_t subBufferSize; // Size of the sub-buffer in elements
     const Func *f; // A pointer to the function info
-    cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
-    cl_kernel
-        *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
-                               // worker thread:  k[vector_size][thread_id]
-    ThreadInfo *
-        tinfo; // An array of thread specific information for each worker thread
+
+    // Programs for various vector sizes.
+    Programs programs;
+
+    // Thread-specific kernels for each vector size:
+    // k[vector_size][thread_id]
+    KernelMatrix k;
+
+    // Array of thread specific information
+    std::vector<ThreadInfo> tinfo;
+
     cl_uint threadCount; // Number of worker threads
     cl_uint jobCount; // Number of jobs
     cl_uint step; // step between each chunk and the next.
     cl_uint scale; // stride between individual test values
     int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+    bool relaxedMode; // True if test is running in relaxed mode, false
+                      // otherwise.
+};
 
 // A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
     -NAN,
     -INFINITY,
     -DBL_MAX,
@@ -264,182 +269,19 @@ static const double specialValues[] = {
     +0.0,
 };
 
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
     sizeof(specialValues) / sizeof(specialValues[0]);
 
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 {
-    TestInfo test_info;
-    cl_int error;
-
-    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
-    // Init test_info
-    memset(&test_info, 0, sizeof(test_info));
-    test_info.threadCount = GetThreadCount();
-    test_info.subBufferSize = BUFFER_SIZE
-        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
-    test_info.scale = getTestScale(sizeof(cl_double));
-
-    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
-    if (test_info.step / test_info.subBufferSize != test_info.scale)
-    {
-        // there was overflow
-        test_info.jobCount = 1;
-    }
-    else
-    {
-        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
-    }
-
-    test_info.f = f;
-    test_info.ftz = f->ftz || gForceFTZ;
-
-    // cl_kernels aren't thread safe, so we make one for each vector size for
-    // every thread
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        size_t array_size = test_info.threadCount * sizeof(cl_kernel);
-        test_info.k[i] = (cl_kernel *)malloc(array_size);
-        if (NULL == test_info.k[i])
-        {
-            vlog_error("Error: Unable to allocate storage for kernels!\n");
-            error = CL_OUT_OF_HOST_MEMORY;
-            goto exit;
-        }
-        memset(test_info.k[i], 0, array_size);
-    }
-    test_info.tinfo =
-        (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
-    if (NULL == test_info.tinfo)
-    {
-        vlog_error(
-            "Error: Unable to allocate storage for thread specific data.\n");
-        error = CL_OUT_OF_HOST_MEMORY;
-        goto exit;
-    }
-    memset(test_info.tinfo, 0,
-           test_info.threadCount * sizeof(*test_info.tinfo));
-    for (size_t i = 0; i < test_info.threadCount; i++)
-    {
-        cl_buffer_region region = {
-            i * test_info.subBufferSize * sizeof(cl_double),
-            test_info.subBufferSize * sizeof(cl_double)
-        };
-        test_info.tinfo[i].inBuf =
-            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-        test_info.tinfo[i].inBuf2 =
-            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
-                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
-        if (error || NULL == test_info.tinfo[i].inBuf2)
-        {
-            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
-                       "region {%zd, %zd}\n",
-                       region.origin, region.size);
-            goto exit;
-        }
-
-        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-        {
-            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
-                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
-                &region, &error);
-            if (error || NULL == test_info.tinfo[i].outBuf[j])
-            {
-                vlog_error("Error: Unable to create sub-buffer of "
-                           "gOutBuffer[%d] for region {%zd, %zd}\n",
-                           (int)j, region.origin, region.size);
-                goto exit;
-            }
-        }
-        test_info.tinfo[i].tQueue =
-            clCreateCommandQueue(gContext, gDevice, 0, &error);
-        if (NULL == test_info.tinfo[i].tQueue || error)
-        {
-            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
-            goto exit;
-        }
-
-        test_info.tinfo[i].d = init_genrand(genrand_int32(d));
-    }
-
-    // Init the kernels
-    {
-        BuildKernelInfo build_info = {
-            gMinVectorSizeIndex, test_info.threadCount, test_info.k,
-            test_info.programs,  f->nameInCode,         relaxedMode
-        };
-        if ((error = ThreadPool_Do(BuildKernelFn,
-                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
-                                   &build_info)))
-            goto exit;
-    }
-
-    // Run the kernels
-    if (!gSkipCorrectnessTesting)
-    {
-        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
-        if (error) goto exit;
-
-        if (gWimpyMode)
-            vlog("Wimp pass");
-        else
-            vlog("passed");
-    }
-
-    vlog("\n");
-
-exit:
-    // Release
-    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
-    {
-        clReleaseProgram(test_info.programs[i]);
-        if (test_info.k[i])
-        {
-            for (cl_uint j = 0; j < test_info.threadCount; j++)
-                clReleaseKernel(test_info.k[i][j]);
-
-            free(test_info.k[i]);
-        }
-    }
-    if (test_info.tinfo)
-    {
-        for (cl_uint i = 0; i < test_info.threadCount; i++)
-        {
-            free_mtdata(test_info.tinfo[i].d);
-            clReleaseMemObject(test_info.tinfo[i].inBuf);
-            clReleaseMemObject(test_info.tinfo[i].inBuf2);
-            for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
-                clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
-            clReleaseCommandQueue(test_info.tinfo[i].tQueue);
-        }
-
-        free(test_info.tinfo);
-    }
-
-    return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
-    const TestInfo *job = (const TestInfo *)data;
+    TestInfo *job = (TestInfo *)data;
     size_t buffer_elements = job->subBufferSize;
     size_t buffer_size = buffer_elements * sizeof(cl_double);
     cl_uint base = job_id * (cl_uint)job->step;
-    ThreadInfo *tinfo = job->tinfo + thread_id;
+    ThreadInfo *tinfo = &(job->tinfo[thread_id]);
     dptr dfunc = job->f->dfunc;
     int ftz = job->ftz;
+    bool relaxedMode = job->relaxedMode;
     MTdata d = tinfo->d;
     cl_int error;
     const char *name = job->f->name;
@@ -538,7 +380,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
                                              out[j], 0, NULL, NULL)))
         {
-            vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+            vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+                       error);
             goto exit;
         }
 
@@ -613,7 +456,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         if (gMinVectorSizeIndex == 0 && t[j] != q[j])
         {
             // If we aren't getting the correctly rounded result
-            if (ftz)
+            if (ftz || relaxedMode)
             {
                 if (IsDoubleSubnormal(s[j]))
                 {
@@ -645,8 +488,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
             cl_ulong err = t[j] - q[j];
             if (q[j] > t[j]) err = q[j] - t[j];
-            vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
-                       "vs. %lld  (index: %d)\n",
+            vlog_error("\nERROR: %s: %" PRId64
+                       " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+                       "vs. %" PRId64 "  (index: %zu)\n",
                        name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
                        q[j], j);
             error = -1;
@@ -654,13 +498,14 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
         }
 
 
-        for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+        for (auto k = std::max(1U, gMinVectorSizeIndex);
+             k < gMaxVectorSizeIndex; k++)
         {
             q = (cl_long *)out[k];
             // If we aren't getting the correctly rounded result
             if (-t[j] != q[j])
             {
-                if (ftz)
+                if (ftz || relaxedMode)
                 {
                     if (IsDoubleSubnormal(s[j]))
                     {
@@ -692,8 +537,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 
                 cl_ulong err = -t[j] - q[j];
                 if (q[j] > -t[j]) err = q[j] + t[j];
-                vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
-                           "%.13la}: *%lld vs. %lld  (index: %d)\n",
+                vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+                           "%.13la}: *%" PRId64 " vs. %" PRId64
+                           "  (index: %zu)\n",
                            name, sizeNames[k], err, ((double *)s)[j],
                            ((double *)s2)[j], -t[j], q[j], j);
                 error = -1;
@@ -735,3 +581,131 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
 exit:
     return error;
 }
+
+} // anonymous namespace
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+    TestInfo test_info{};
+    cl_int error;
+
+    logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+    // Init test_info
+    test_info.threadCount = GetThreadCount();
+    test_info.subBufferSize = BUFFER_SIZE
+        / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+    test_info.scale = getTestScale(sizeof(cl_double));
+
+    test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+    if (test_info.step / test_info.subBufferSize != test_info.scale)
+    {
+        // there was overflow
+        test_info.jobCount = 1;
+    }
+    else
+    {
+        test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+    }
+
+    test_info.f = f;
+    test_info.ftz = f->ftz || gForceFTZ;
+    test_info.relaxedMode = relaxedMode;
+
+    // cl_kernels aren't thread safe, so we make one for each vector size for
+    // every thread
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        test_info.k[i].resize(test_info.threadCount, nullptr);
+    }
+
+    test_info.tinfo.resize(test_info.threadCount);
+    for (cl_uint i = 0; i < test_info.threadCount; i++)
+    {
+        cl_buffer_region region = {
+            i * test_info.subBufferSize * sizeof(cl_double),
+            test_info.subBufferSize * sizeof(cl_double)
+        };
+        test_info.tinfo[i].inBuf =
+            clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+        test_info.tinfo[i].inBuf2 =
+            clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+                              CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+        if (error || NULL == test_info.tinfo[i].inBuf2)
+        {
+            vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+                       "region {%zd, %zd}\n",
+                       region.origin, region.size);
+            goto exit;
+        }
+
+        for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+        {
+            test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+                gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+                &region, &error);
+            if (error || NULL == test_info.tinfo[i].outBuf[j])
+            {
+                vlog_error("Error: Unable to create sub-buffer of "
+                           "gOutBuffer[%d] for region {%zd, %zd}\n",
+                           (int)j, region.origin, region.size);
+                goto exit;
+            }
+        }
+        test_info.tinfo[i].tQueue =
+            clCreateCommandQueue(gContext, gDevice, 0, &error);
+        if (NULL == test_info.tinfo[i].tQueue || error)
+        {
+            vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+            goto exit;
+        }
+
+        test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+    }
+
+    // Init the kernels
+    {
+        BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+                                    test_info.programs, f->nameInCode,
+                                    relaxedMode };
+        if ((error = ThreadPool_Do(BuildKernelFn,
+                                   gMaxVectorSizeIndex - gMinVectorSizeIndex,
+                                   &build_info)))
+            goto exit;
+    }
+
+    // Run the kernels
+    if (!gSkipCorrectnessTesting)
+    {
+        error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+        if (error) goto exit;
+
+        if (gWimpyMode)
+            vlog("Wimp pass");
+        else
+            vlog("passed");
+    }
+
+    vlog("\n");
+
+exit:
+    // Release
+    for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+    {
+        for (auto &kernel : test_info.k[i])
+        {
+            clReleaseKernel(kernel);
+        }
+    }
+
+    return error;
+}