aboutsummaryrefslogtreecommitdiff
path: root/test_conformance/math_brute_force/binary_float.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test_conformance/math_brute_force/binary_float.cpp')
-rw-r--r--test_conformance/math_brute_force/binary_float.cpp425
1 files changed, 195 insertions, 230 deletions
diff --git a/test_conformance/math_brute_force/binary_float.cpp b/test_conformance/math_brute_force/binary_float.cpp
index 32caafa3..fe1491d7 100644
--- a/test_conformance/math_brute_force/binary_float.cpp
+++ b/test_conformance/math_brute_force/binary_float.cpp
@@ -14,16 +14,19 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
#include <cstring>
+namespace {
+
const float twoToMinus126 = MAKE_HEX_FLOAT(0x1p-126f, 1, -126);
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "__kernel void math_kernel",
sizeNames[vectorSize],
@@ -107,49 +110,49 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double
maxErrorValue; // position of the max error value (param 1). Init to 0.
double maxErrorValue2; // position of the max error value (param 2). Init
// to 0.
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
-typedef struct TestInfo
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -162,10 +165,10 @@ typedef struct TestInfo
int isNextafter;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
+};
// A table of more difficult cases to get right
-static const float specialValues[] = {
+const float specialValues[] = {
-NAN,
-INFINITY,
-FLT_MAX,
@@ -267,209 +270,23 @@ static const float specialValues[] = {
+0.0f,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
-{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
- double maxErrorVal2 = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_float));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
- test_info.ftz =
- f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
- test_info.relaxedMode = relaxedMode;
- test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
- test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
- test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_float),
- test_info.subBufferSize * sizeof(cl_float)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_float);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
fptr func = job->f->func;
int ftz = job->ftz;
bool relaxedMode = job->relaxedMode;
float ulps = getAllowedUlpError(job->f, relaxedMode);
MTdata d = tinfo->d;
cl_int error;
- cl_uchar *overflow = (cl_uchar *)malloc(buffer_size);
+ std::vector<bool> overflow(buffer_elements, false);
const char *name = job->f->name;
int isFDim = job->isFDim;
int skipNanInf = job->skipNanInf;
@@ -583,7 +400,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -631,7 +449,6 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
vlog_error("Error: clFinish failed! err: %d\n", error);
goto exit;
}
- free(overflow);
return CL_SUCCESS;
}
@@ -641,7 +458,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
// Calculate the correctly rounded reference result
memset(&oldMode, 0, sizeof(oldMode));
- if (ftz) ForceFTZ(&oldMode);
+ if (ftz || relaxedMode) ForceFTZ(&oldMode);
// Set the rounding mode to match the device
if (gIsInRTZMode) oldRoundMode = set_round(kRoundTowardZero, kfloat);
@@ -726,7 +543,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
float err = Ulp_Error(test, correct);
int fail = !(fabsf(err) <= ulps);
- if (fail && ftz)
+ if (fail && (ftz || relaxedMode))
{
// retry per section 6.5.3.2
if (IsFloatResultSubnormal(correct, ulps))
@@ -938,7 +755,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
vlog_error(
"\nERROR: %s%s: %f ulp error at {%a (0x%x), %a "
- "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %d\n",
+ "(0x%x)}: *%a vs. %a (0x%8.8x) at index: %zu\n",
name, sizeNames[k], err, s[j], ((cl_uint *)s)[j],
s2[j], ((cl_uint *)s2)[j], r[j], test,
((cl_uint *)&test)[0], j);
@@ -970,7 +787,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
if (gVerboseBruteForce)
{
- vlog("base:%14u step:%10u scale:%10zu buf_elements:%10u ulps:%5.3f "
+ vlog("base:%14u step:%10u scale:%10u buf_elements:%10zu ulps:%5.3f "
"ThreadCount:%2u\n",
base, job->step, job->scale, buffer_elements, job->ulps,
job->threadCount);
@@ -983,6 +800,154 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
exit:
- if (overflow) free(overflow);
+ return error;
+}
+
+} // anonymous namespace
+
+int TestFunc_Float_Float_Float(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+ double maxErrorVal2 = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_float), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_float) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_float));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = gIsEmbedded ? f->float_embedded_ulps : f->float_ulps;
+ test_info.ftz =
+ f->ftz || gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities);
+ test_info.relaxedMode = relaxedMode;
+ test_info.isFDim = 0 == strcmp("fdim", f->nameInCode);
+ test_info.skipNanInf = test_info.isFDim && !gInfNanSupport;
+ test_info.isNextafter = 0 == strcmp("nextafter", f->nameInCode);
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_float),
+ test_info.subBufferSize * sizeof(cl_float)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ maxErrorVal2 = test_info.tinfo[i].maxErrorValue2;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ {%a, %a}", maxError, maxErrorVal, maxErrorVal2);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
return error;
}