diff options
Diffstat (limited to 'test_conformance/math_brute_force/macro_binary_double.cpp')
-rw-r--r-- | test_conformance/math_brute_force/macro_binary_double.cpp | 394 |
1 files changed, 184 insertions, 210 deletions
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp index d09915f6..412f210b 100644 --- a/test_conformance/math_brute_force/macro_binary_double.cpp +++ b/test_conformance/math_brute_force/macro_binary_double.cpp @@ -14,14 +14,18 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" +#include <cinttypes> #include <cstring> -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +namespace { + +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n", "__kernel void math_kernel", @@ -107,54 +111,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; - -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; - -typedef struct TestInfo + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + + MTdataHolder d; + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; +}; + +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Programs for various vector sizes. + Programs programs; + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector<ThreadInfo> tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. cl_uint scale; // stride between individual test values int ftz; // non-zero if running in flush to zero mode - -} TestInfo; + bool relaxedMode; // True if test is running in relaxed mode, false + // otherwise. +}; // A table of more difficult cases to get right -static const double specialValues[] = { +const double specialValues[] = { -NAN, -INFINITY, -DBL_MAX, @@ -264,182 +269,19 @@ static const double specialValues[] = { +0.0, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - TestInfo test_info; - cl_int error; - - logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_double)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ftz = f->ftz || gForceFTZ; - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (size_t i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_double), - test_info.subBufferSize * sizeof(cl_double) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) -{ - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_double); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); dptr dfunc = job->f->dfunc; int ftz = job->ftz; + bool relaxedMode = job->relaxedMode; MTdata d = tinfo->d; cl_int error; const char *name = job->f->name; @@ -538,7 +380,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } @@ -613,7 +456,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if (gMinVectorSizeIndex == 0 && t[j] != q[j]) { // If we aren't getting the correctly rounded result - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -645,8 +488,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = t[j] - q[j]; if (q[j] > t[j]) err = q[j] - t[j]; - vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld " - "vs. %lld (index: %d)\n", + vlog_error("\nERROR: %s: %" PRId64 + " ulp error at {%.13la, %.13la}: *%" PRId64 " " + "vs. %" PRId64 " (index: %zu)\n", name, err, ((double *)s)[j], ((double *)s2)[j], t[j], q[j], j); error = -1; @@ -654,13 +498,14 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } - for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++) + for (auto k = std::max(1U, gMinVectorSizeIndex); + k < gMaxVectorSizeIndex; k++) { q = (cl_long *)out[k]; // If we aren't getting the correctly rounded result if (-t[j] != q[j]) { - if (ftz) + if (ftz || relaxedMode) { if (IsDoubleSubnormal(s[j])) { @@ -692,8 +537,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) cl_ulong err = -t[j] - q[j]; if (q[j] > -t[j]) err = q[j] + t[j]; - vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, " - "%.13la}: *%lld vs. %lld (index: %d)\n", + vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, " + "%.13la}: *%" PRId64 " vs. %" PRId64 + " (index: %zu)\n", name, sizeNames[k], err, ((double *)s)[j], ((double *)s2)[j], -t[j], q[j], j); error = -1; @@ -735,3 +581,131 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) exit: return error; } + +} // anonymous namespace + +int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + + logFunctionInfo(f->name, sizeof(cl_double), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_double)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ftz = f->ftz || gForceFTZ; + test_info.relaxedMode = relaxedMode; + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_double), + test_info.subBufferSize * sizeof(cl_double) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + + return error; +} |