diff options
Diffstat (limited to 'test_conformance/math_brute_force/binary_float.cpp')
-rw-r--r-- | test_conformance/math_brute_force/binary_float.cpp | 425 |
1 files changed, 195 insertions, 230 deletions
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp index 32caafa3..fe1491d7 100644 --- a/test_conformance/math_brute_force/binary_float.cpp +++ b/test_conformance/math_brute_force/binary_float.cpp @@ -14,16 +14,19 @@ // limitations under the License. // +#include "common.h" #include "function_list.h" #include "test_functions.h" #include "utility.h" #include <cstring> +namespace { + const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126); -static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, - cl_kernel *k, cl_program *p, bool relaxedMode) +int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, + cl_kernel *k, cl_program *p, bool relaxedMode) { const char *c[] = { "__kernel void math_kernel", sizeNames[vectorSize], @@ -107,49 +110,49 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count, relaxedMode); } -typedef struct BuildKernelInfo -{ - cl_uint offset; // the first vector size to build - cl_uint kernel_count; - cl_kernel **kernels; - cl_program *programs; - const char *nameInCode; - bool relaxedMode; // Whether to build with -cl-fast-relaxed-math. -} BuildKernelInfo; - -static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) +cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p) { BuildKernelInfo *info = (BuildKernelInfo *)p; - cl_uint i = info->offset + job_id; - return BuildKernel(info->nameInCode, i, info->kernel_count, - info->kernels[i], info->programs + i, info->relaxedMode); + cl_uint vectorSize = gMinVectorSizeIndex + job_id; + return BuildKernel(info->nameInCode, vectorSize, info->threadCount, + info->kernels[vectorSize].data(), + &(info->programs[vectorSize]), info->relaxedMode); } // Thread specific data for a worker thread -typedef struct ThreadInfo +struct ThreadInfo { - cl_mem inBuf; // input buffer for the thread - cl_mem inBuf2; // input buffer for the thread - cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread + // Input and output buffers for the thread + clMemWrapper inBuf; + clMemWrapper inBuf2; + Buffers outBuf; + float maxError; // max error value. Init to 0. double maxErrorValue; // position of the max error value (param 1). Init to 0. double maxErrorValue2; // position of the max error value (param 2). Init // to 0. - MTdata d; - cl_command_queue tQueue; // per thread command queue to improve performance -} ThreadInfo; + MTdataHolder d; + + // Per thread command queue to improve performance + clCommandQueueWrapper tQueue; +}; -typedef struct TestInfo +struct TestInfo { size_t subBufferSize; // Size of the sub-buffer in elements const Func *f; // A pointer to the function info - cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes - cl_kernel - *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each - // worker thread: k[vector_size][thread_id] - ThreadInfo * - tinfo; // An array of thread specific information for each worker thread + + // Programs for various vector sizes. + Programs programs; + + // Thread-specific kernels for each vector size: + // k[vector_size][thread_id] + KernelMatrix k; + + // Array of thread specific information + std::vector<ThreadInfo> tinfo; + cl_uint threadCount; // Number of worker threads cl_uint jobCount; // Number of jobs cl_uint step; // step between each chunk and the next. @@ -162,10 +165,10 @@ typedef struct TestInfo int isNextafter; bool relaxedMode; // True if test is running in relaxed mode, false // otherwise. -} TestInfo; +}; // A table of more difficult cases to get right -static const float specialValues[] = { +const float specialValues[] = { -NAN, -INFINITY, -FLT_MAX, @@ -267,209 +270,23 @@ static const float specialValues[] = { +0.0f, }; -static const size_t specialValuesCount = +constexpr size_t specialValuesCount = sizeof(specialValues) / sizeof(specialValues[0]); -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data); - -int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) -{ - TestInfo test_info; - cl_int error; - float maxError = 0.0f; - double maxErrorVal = 0.0; - double maxErrorVal2 = 0.0; - - logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); - - // Init test_info - memset(&test_info, 0, sizeof(test_info)); - test_info.threadCount = GetThreadCount(); - test_info.subBufferSize = BUFFER_SIZE - / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); - test_info.scale = getTestScale(sizeof(cl_float)); - - test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; - if (test_info.step / test_info.subBufferSize != test_info.scale) - { - // there was overflow - test_info.jobCount = 1; - } - else - { - test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); - } - - test_info.f = f; - test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; - test_info.ftz = - f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); - test_info.relaxedMode = relaxedMode; - test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); - test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; - test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); - - // cl_kernels aren't thread safe, so we make one for each vector size for - // every thread - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - size_t array_size = test_info.threadCount * sizeof(cl_kernel); - test_info.k[i] = (cl_kernel *)malloc(array_size); - if (NULL == test_info.k[i]) - { - vlog_error("Error: Unable to allocate storage for kernels!\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.k[i], 0, array_size); - } - test_info.tinfo = - (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo)); - if (NULL == test_info.tinfo) - { - vlog_error( - "Error: Unable to allocate storage for thread specific data.\n"); - error = CL_OUT_OF_HOST_MEMORY; - goto exit; - } - memset(test_info.tinfo, 0, - test_info.threadCount * sizeof(*test_info.tinfo)); - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - cl_buffer_region region = { - i * test_info.subBufferSize * sizeof(cl_float), - test_info.subBufferSize * sizeof(cl_float) - }; - test_info.tinfo[i].inBuf = - clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - test_info.tinfo[i].inBuf2 = - clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, - CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); - if (error || NULL == test_info.tinfo[i].inBuf2) - { - vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " - "region {%zd, %zd}\n", - region.origin, region.size); - goto exit; - } - - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - { - test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( - gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, - ®ion, &error); - if (error || NULL == test_info.tinfo[i].outBuf[j]) - { - vlog_error("Error: Unable to create sub-buffer of " - "gOutBuffer[%d] for region {%zd, %zd}\n", - (int)j, region.origin, region.size); - goto exit; - } - } - test_info.tinfo[i].tQueue = - clCreateCommandQueue(gContext, gDevice, 0, &error); - if (NULL == test_info.tinfo[i].tQueue || error) - { - vlog_error("clCreateCommandQueue failed. (%d)\n", error); - goto exit; - } - - test_info.tinfo[i].d = init_genrand(genrand_int32(d)); - } - - // Init the kernels - { - BuildKernelInfo build_info = { - gMinVectorSizeIndex, test_info.threadCount, test_info.k, - test_info.programs, f->nameInCode, relaxedMode - }; - if ((error = ThreadPool_Do(BuildKernelFn, - gMaxVectorSizeIndex - gMinVectorSizeIndex, - &build_info))) - goto exit; - } - - // Run the kernels - if (!gSkipCorrectnessTesting) - { - error = ThreadPool_Do(Test, test_info.jobCount, &test_info); - - // Accumulate the arithmetic errors - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - if (test_info.tinfo[i].maxError > maxError) - { - maxError = test_info.tinfo[i].maxError; - maxErrorVal = test_info.tinfo[i].maxErrorValue; - maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; - } - } - - if (error) goto exit; - - if (gWimpyMode) - vlog("Wimp pass"); - else - vlog("passed"); - - vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); - } - - vlog("\n"); - -exit: - // Release - for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) - { - clReleaseProgram(test_info.programs[i]); - if (test_info.k[i]) - { - for (cl_uint j = 0; j < test_info.threadCount; j++) - clReleaseKernel(test_info.k[i][j]); - - free(test_info.k[i]); - } - } - if (test_info.tinfo) - { - for (cl_uint i = 0; i < test_info.threadCount; i++) - { - free_mtdata(test_info.tinfo[i].d); - clReleaseMemObject(test_info.tinfo[i].inBuf); - clReleaseMemObject(test_info.tinfo[i].inBuf2); - for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) - clReleaseMemObject(test_info.tinfo[i].outBuf[j]); - clReleaseCommandQueue(test_info.tinfo[i].tQueue); - } - - free(test_info.tinfo); - } - - return error; -} - -static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) +cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { - const TestInfo *job = (const TestInfo *)data; + TestInfo *job = (TestInfo *)data; size_t buffer_elements = job->subBufferSize; size_t buffer_size = buffer_elements * sizeof(cl_float); cl_uint base = job_id * (cl_uint)job->step; - ThreadInfo *tinfo = job->tinfo + thread_id; + ThreadInfo *tinfo = &(job->tinfo[thread_id]); fptr func = job->f->func; int ftz = job->ftz; bool relaxedMode = job->relaxedMode; float ulps = getAllowedUlpError(job->f, relaxedMode); MTdata d = tinfo->d; cl_int error; - cl_uchar *overflow = (cl_uchar *)malloc(buffer_size); + std::vector<bool> overflow(buffer_elements, false); const char *name = job->f->name; int isFDim = job->isFDim; int skipNanInf = job->skipNanInf; @@ -583,7 +400,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j], out[j], 0, NULL, NULL))) { - vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error); + vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n", + error); goto exit; } @@ -631,7 +449,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) vlog_error("Error: clFinish failed! err: %d\n", error); goto exit; } - free(overflow); return CL_SUCCESS; } @@ -641,7 +458,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { // Calculate the correctly rounded reference result memset(&oldMode, 0, sizeof(oldMode)); - if (ftz) ForceFTZ(&oldMode); + if (ftz || relaxedMode) ForceFTZ(&oldMode); // Set the rounding mode to match the device if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat); @@ -726,7 +543,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) float err = Ulp_Error(test, correct); int fail = !(fabsf(err) <= ulps); - if (fail && ftz) + if (fail && (ftz || relaxedMode)) { // retry per section 6.5.3.2 if (IsFloatResultSubnormal(correct, ulps)) @@ -938,7 +755,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { vlog_error( "\nERROR: %s%s: %f ulp error at {%a (0x%x), %a " - "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n", + "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n", name, sizeNames[k], err, s[j], ((cl_uint *)s)[j], s2[j], ((cl_uint *)s2)[j], r[j], test, ((cl_uint *)&test)[0], j); @@ -970,7 +787,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) { if (gVerboseBruteForce) { - vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f " + vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f " "ThreadCount:%2u\n", base, job->step, job->scale, buffer_elements, job->ulps, job->threadCount); @@ -983,6 +800,154 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data) } exit: - if (overflow) free(overflow); + return error; +} + +} // anonymous namespace + +int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode) +{ + TestInfo test_info{}; + cl_int error; + float maxError = 0.0f; + double maxErrorVal = 0.0; + double maxErrorVal2 = 0.0; + + logFunctionInfo(f->name, sizeof(cl_float), relaxedMode); + + // Init test_info + test_info.threadCount = GetThreadCount(); + test_info.subBufferSize = BUFFER_SIZE + / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount)); + test_info.scale = getTestScale(sizeof(cl_float)); + + test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale; + if (test_info.step / test_info.subBufferSize != test_info.scale) + { + // there was overflow + test_info.jobCount = 1; + } + else + { + test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step); + } + + test_info.f = f; + test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps; + test_info.ftz = + f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities); + test_info.relaxedMode = relaxedMode; + test_info.isFDim = 0 == strcmp("fdim", f->nameInCode); + test_info.skipNanInf = test_info.isFDim && !gInfNanSupport; + test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode); + + // cl_kernels aren't thread safe, so we make one for each vector size for + // every thread + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + test_info.k[i].resize(test_info.threadCount, nullptr); + } + + test_info.tinfo.resize(test_info.threadCount); + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + cl_buffer_region region = { + i * test_info.subBufferSize * sizeof(cl_float), + test_info.subBufferSize * sizeof(cl_float) + }; + test_info.tinfo[i].inBuf = + clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + test_info.tinfo[i].inBuf2 = + clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY, + CL_BUFFER_CREATE_TYPE_REGION, ®ion, &error); + if (error || NULL == test_info.tinfo[i].inBuf2) + { + vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for " + "region {%zd, %zd}\n", + region.origin, region.size); + goto exit; + } + + for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++) + { + test_info.tinfo[i].outBuf[j] = clCreateSubBuffer( + gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION, + ®ion, &error); + if (error || NULL == test_info.tinfo[i].outBuf[j]) + { + vlog_error("Error: Unable to create sub-buffer of " + "gOutBuffer[%d] for region {%zd, %zd}\n", + (int)j, region.origin, region.size); + goto exit; + } + } + test_info.tinfo[i].tQueue = + clCreateCommandQueue(gContext, gDevice, 0, &error); + if (NULL == test_info.tinfo[i].tQueue || error) + { + vlog_error("clCreateCommandQueue failed. (%d)\n", error); + goto exit; + } + + test_info.tinfo[i].d = MTdataHolder(genrand_int32(d)); + } + + // Init the kernels + { + BuildKernelInfo build_info{ test_info.threadCount, test_info.k, + test_info.programs, f->nameInCode, + relaxedMode }; + if ((error = ThreadPool_Do(BuildKernelFn, + gMaxVectorSizeIndex - gMinVectorSizeIndex, + &build_info))) + goto exit; + } + + // Run the kernels + if (!gSkipCorrectnessTesting) + { + error = ThreadPool_Do(Test, test_info.jobCount, &test_info); + + // Accumulate the arithmetic errors + for (cl_uint i = 0; i < test_info.threadCount; i++) + { + if (test_info.tinfo[i].maxError > maxError) + { + maxError = test_info.tinfo[i].maxError; + maxErrorVal = test_info.tinfo[i].maxErrorValue; + maxErrorVal2 = test_info.tinfo[i].maxErrorValue2; + } + } + + if (error) goto exit; + + if (gWimpyMode) + vlog("Wimp pass"); + else + vlog("passed"); + + vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2); + } + + vlog("\n"); + +exit: + // Release + for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++) + { + for (auto &kernel : test_info.k[i]) + { + clReleaseKernel(kernel); + } + } + return error; } |