aboutsummaryrefslogtreecommitdiff
path: root/test_conformance/math_brute_force/unary_double.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test_conformance/math_brute_force/unary_double.cpp')
-rw-r--r--test_conformance/math_brute_force/unary_double.cpp371
1 files changed, 171 insertions, 200 deletions
diff --git a/test_conformance/math_brute_force/unary_double.cpp b/test_conformance/math_brute_force/unary_double.cpp
index f6fa3264..177cfe5b 100644
--- a/test_conformance/math_brute_force/unary_double.cpp
+++ b/test_conformance/math_brute_force/unary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -101,44 +105,44 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ Buffers outBuf;
+
float maxError; // max error value. Init to 0.
double maxErrorValue; // position of the max error value. Init to 0.
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-typedef struct TestInfo
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
@@ -151,185 +155,21 @@ typedef struct TestInfo
float half_sin_cos_tan_limit;
bool relaxedMode; // True if test is running in relaxed mode, false
// otherwise.
-} TestInfo;
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
+};
-int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
- float maxError = 0.0f;
- double maxErrorVal = 0.0;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ulps = f->double_ulps;
- test_info.ftz = f->ftz || gForceFTZ;
- test_info.relaxedMode = relaxedMode;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- // Accumulate the arithmetic errors
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- if (test_info.tinfo[i].maxError > maxError)
- {
- maxError = test_info.tinfo[i].maxError;
- maxErrorVal = test_info.tinfo[i].maxErrorValue;
- }
- }
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
-
- vlog("\t%8.2f @ %a", maxError, maxErrorVal);
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint scale = job->scale;
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
float ulps = job->ulps;
dptr func = job->f->dfunc;
cl_int error;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
Force64BitFPUPrecision();
@@ -385,7 +225,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
return error;
}
@@ -463,7 +304,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
- if (ftz)
+ if (ftz || relaxedMode)
{
// retry per section 6.5.3.2
if (IsDoubleResultSubnormal(correct, ulps))
@@ -505,7 +346,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (fail)
{
vlog_error("\nERROR: %s%s: %f ulp error at %.13la "
- "(0x%16.16llx): *%.13la vs. %.13la\n",
+ "(0x%16.16" PRIx64 "): *%.13la vs. %.13la\n",
job->f->name, sizeNames[k], err,
((cl_double *)gIn)[j], ((cl_ulong *)gIn)[j],
((cl_double *)gOut_Ref)[j], test);
@@ -547,3 +388,133 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
return CL_SUCCESS;
}
+
+} // anonymous namespace
+
+int TestFunc_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+ float maxError = 0.0f;
+ double maxErrorVal = 0.0;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ulps = f->double_ulps;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ // Accumulate the arithmetic errors
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ if (test_info.tinfo[i].maxError > maxError)
+ {
+ maxError = test_info.tinfo[i].maxError;
+ maxErrorVal = test_info.tinfo[i].maxErrorValue;
+ }
+ }
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+
+ vlog("\t%8.2f @ %a", maxError, maxErrorVal);
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}