aboutsummaryrefslogtreecommitdiff
path: root/test_conformance/math_brute_force/macro_binary_double.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'test_conformance/math_brute_force/macro_binary_double.cpp')
-rw-r--r--test_conformance/math_brute_force/macro_binary_double.cpp394
1 files changed, 184 insertions, 210 deletions
diff --git a/test_conformance/math_brute_force/macro_binary_double.cpp b/test_conformance/math_brute_force/macro_binary_double.cpp
index d09915f6..412f210b 100644
--- a/test_conformance/math_brute_force/macro_binary_double.cpp
+++ b/test_conformance/math_brute_force/macro_binary_double.cpp
@@ -14,14 +14,18 @@
// limitations under the License.
//
+#include "common.h"
#include "function_list.h"
#include "test_functions.h"
#include "utility.h"
+#include <cinttypes>
#include <cstring>
-static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
- cl_kernel *k, cl_program *p, bool relaxedMode)
+namespace {
+
+int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
+ cl_kernel *k, cl_program *p, bool relaxedMode)
{
const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
"__kernel void math_kernel",
@@ -107,54 +111,55 @@ static int BuildKernel(const char *name, int vectorSize, cl_uint kernel_count,
relaxedMode);
}
-typedef struct BuildKernelInfo
-{
- cl_uint offset; // the first vector size to build
- cl_uint kernel_count;
- cl_kernel **kernels;
- cl_program *programs;
- const char *nameInCode;
- bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
-} BuildKernelInfo;
-
-static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
+cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
{
BuildKernelInfo *info = (BuildKernelInfo *)p;
- cl_uint i = info->offset + job_id;
- return BuildKernel(info->nameInCode, i, info->kernel_count,
- info->kernels[i], info->programs + i, info->relaxedMode);
+ cl_uint vectorSize = gMinVectorSizeIndex + job_id;
+ return BuildKernel(info->nameInCode, vectorSize, info->threadCount,
+ info->kernels[vectorSize].data(),
+ &(info->programs[vectorSize]), info->relaxedMode);
}
// Thread specific data for a worker thread
-typedef struct ThreadInfo
+struct ThreadInfo
{
- cl_mem inBuf; // input buffer for the thread
- cl_mem inBuf2; // input buffer for the thread
- cl_mem outBuf[VECTOR_SIZE_COUNT]; // output buffers for the thread
- MTdata d;
- cl_command_queue tQueue; // per thread command queue to improve performance
-} ThreadInfo;
-
-typedef struct TestInfo
+ // Input and output buffers for the thread
+ clMemWrapper inBuf;
+ clMemWrapper inBuf2;
+ Buffers outBuf;
+
+ MTdataHolder d;
+
+ // Per thread command queue to improve performance
+ clCommandQueueWrapper tQueue;
+};
+
+struct TestInfo
{
size_t subBufferSize; // Size of the sub-buffer in elements
const Func *f; // A pointer to the function info
- cl_program programs[VECTOR_SIZE_COUNT]; // programs for various vector sizes
- cl_kernel
- *k[VECTOR_SIZE_COUNT]; // arrays of thread-specific kernels for each
- // worker thread: k[vector_size][thread_id]
- ThreadInfo *
- tinfo; // An array of thread specific information for each worker thread
+
+ // Programs for various vector sizes.
+ Programs programs;
+
+ // Thread-specific kernels for each vector size:
+ // k[vector_size][thread_id]
+ KernelMatrix k;
+
+ // Array of thread specific information
+ std::vector<ThreadInfo> tinfo;
+
cl_uint threadCount; // Number of worker threads
cl_uint jobCount; // Number of jobs
cl_uint step; // step between each chunk and the next.
cl_uint scale; // stride between individual test values
int ftz; // non-zero if running in flush to zero mode
-
-} TestInfo;
+ bool relaxedMode; // True if test is running in relaxed mode, false
+ // otherwise.
+};
// A table of more difficult cases to get right
-static const double specialValues[] = {
+const double specialValues[] = {
-NAN,
-INFINITY,
-DBL_MAX,
@@ -264,182 +269,19 @@ static const double specialValues[] = {
+0.0,
};
-static const size_t specialValuesCount =
+constexpr size_t specialValuesCount =
sizeof(specialValues) / sizeof(specialValues[0]);
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data);
-
-int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
{
- TestInfo test_info;
- cl_int error;
-
- logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
-
- // Init test_info
- memset(&test_info, 0, sizeof(test_info));
- test_info.threadCount = GetThreadCount();
- test_info.subBufferSize = BUFFER_SIZE
- / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
- test_info.scale = getTestScale(sizeof(cl_double));
-
- test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
- if (test_info.step / test_info.subBufferSize != test_info.scale)
- {
- // there was overflow
- test_info.jobCount = 1;
- }
- else
- {
- test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
- }
-
- test_info.f = f;
- test_info.ftz = f->ftz || gForceFTZ;
-
- // cl_kernels aren't thread safe, so we make one for each vector size for
- // every thread
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- size_t array_size = test_info.threadCount * sizeof(cl_kernel);
- test_info.k[i] = (cl_kernel *)malloc(array_size);
- if (NULL == test_info.k[i])
- {
- vlog_error("Error: Unable to allocate storage for kernels!\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.k[i], 0, array_size);
- }
- test_info.tinfo =
- (ThreadInfo *)malloc(test_info.threadCount * sizeof(*test_info.tinfo));
- if (NULL == test_info.tinfo)
- {
- vlog_error(
- "Error: Unable to allocate storage for thread specific data.\n");
- error = CL_OUT_OF_HOST_MEMORY;
- goto exit;
- }
- memset(test_info.tinfo, 0,
- test_info.threadCount * sizeof(*test_info.tinfo));
- for (size_t i = 0; i < test_info.threadCount; i++)
- {
- cl_buffer_region region = {
- i * test_info.subBufferSize * sizeof(cl_double),
- test_info.subBufferSize * sizeof(cl_double)
- };
- test_info.tinfo[i].inBuf =
- clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
- test_info.tinfo[i].inBuf2 =
- clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
- CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
- if (error || NULL == test_info.tinfo[i].inBuf2)
- {
- vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
- "region {%zd, %zd}\n",
- region.origin, region.size);
- goto exit;
- }
-
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- {
- test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
- gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
- &region, &error);
- if (error || NULL == test_info.tinfo[i].outBuf[j])
- {
- vlog_error("Error: Unable to create sub-buffer of "
- "gOutBuffer[%d] for region {%zd, %zd}\n",
- (int)j, region.origin, region.size);
- goto exit;
- }
- }
- test_info.tinfo[i].tQueue =
- clCreateCommandQueue(gContext, gDevice, 0, &error);
- if (NULL == test_info.tinfo[i].tQueue || error)
- {
- vlog_error("clCreateCommandQueue failed. (%d)\n", error);
- goto exit;
- }
-
- test_info.tinfo[i].d = init_genrand(genrand_int32(d));
- }
-
- // Init the kernels
- {
- BuildKernelInfo build_info = {
- gMinVectorSizeIndex, test_info.threadCount, test_info.k,
- test_info.programs, f->nameInCode, relaxedMode
- };
- if ((error = ThreadPool_Do(BuildKernelFn,
- gMaxVectorSizeIndex - gMinVectorSizeIndex,
- &build_info)))
- goto exit;
- }
-
- // Run the kernels
- if (!gSkipCorrectnessTesting)
- {
- error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
-
- if (error) goto exit;
-
- if (gWimpyMode)
- vlog("Wimp pass");
- else
- vlog("passed");
- }
-
- vlog("\n");
-
-exit:
- // Release
- for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
- {
- clReleaseProgram(test_info.programs[i]);
- if (test_info.k[i])
- {
- for (cl_uint j = 0; j < test_info.threadCount; j++)
- clReleaseKernel(test_info.k[i][j]);
-
- free(test_info.k[i]);
- }
- }
- if (test_info.tinfo)
- {
- for (cl_uint i = 0; i < test_info.threadCount; i++)
- {
- free_mtdata(test_info.tinfo[i].d);
- clReleaseMemObject(test_info.tinfo[i].inBuf);
- clReleaseMemObject(test_info.tinfo[i].inBuf2);
- for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
- clReleaseMemObject(test_info.tinfo[i].outBuf[j]);
- clReleaseCommandQueue(test_info.tinfo[i].tQueue);
- }
-
- free(test_info.tinfo);
- }
-
- return error;
-}
-
-static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
-{
- const TestInfo *job = (const TestInfo *)data;
+ TestInfo *job = (TestInfo *)data;
size_t buffer_elements = job->subBufferSize;
size_t buffer_size = buffer_elements * sizeof(cl_double);
cl_uint base = job_id * (cl_uint)job->step;
- ThreadInfo *tinfo = job->tinfo + thread_id;
+ ThreadInfo *tinfo = &(job->tinfo[thread_id]);
dptr dfunc = job->f->dfunc;
int ftz = job->ftz;
+ bool relaxedMode = job->relaxedMode;
MTdata d = tinfo->d;
cl_int error;
const char *name = job->f->name;
@@ -538,7 +380,8 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if ((error = clEnqueueUnmapMemObject(tinfo->tQueue, tinfo->outBuf[j],
out[j], 0, NULL, NULL)))
{
- vlog_error("Error: clEnqueueMapBuffer failed! err: %d\n", error);
+ vlog_error("Error: clEnqueueUnmapMemObject failed! err: %d\n",
+ error);
goto exit;
}
@@ -613,7 +456,7 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
if (gMinVectorSizeIndex == 0 && t[j] != q[j])
{
// If we aren't getting the correctly rounded result
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -645,8 +488,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = t[j] - q[j];
if (q[j] > t[j]) err = q[j] - t[j];
- vlog_error("\nERROR: %s: %lld ulp error at {%.13la, %.13la}: *%lld "
- "vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %s: %" PRId64
+ " ulp error at {%.13la, %.13la}: *%" PRId64 " "
+ "vs. %" PRId64 " (index: %zu)\n",
name, err, ((double *)s)[j], ((double *)s2)[j], t[j],
q[j], j);
error = -1;
@@ -654,13 +498,14 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
}
- for (auto k = MAX(1, gMinVectorSizeIndex); k < gMaxVectorSizeIndex; k++)
+ for (auto k = std::max(1U, gMinVectorSizeIndex);
+ k < gMaxVectorSizeIndex; k++)
{
q = (cl_long *)out[k];
// If we aren't getting the correctly rounded result
if (-t[j] != q[j])
{
- if (ftz)
+ if (ftz || relaxedMode)
{
if (IsDoubleSubnormal(s[j]))
{
@@ -692,8 +537,9 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
cl_ulong err = -t[j] - q[j];
if (q[j] > -t[j]) err = q[j] + t[j];
- vlog_error("\nERROR: %sD%s: %lld ulp error at {%.13la, "
- "%.13la}: *%lld vs. %lld (index: %d)\n",
+ vlog_error("\nERROR: %sD%s: %" PRId64 " ulp error at {%.13la, "
+ "%.13la}: *%" PRId64 " vs. %" PRId64
+ " (index: %zu)\n",
name, sizeNames[k], err, ((double *)s)[j],
((double *)s2)[j], -t[j], q[j], j);
error = -1;
@@ -735,3 +581,131 @@ static cl_int Test(cl_uint job_id, cl_uint thread_id, void *data)
exit:
return error;
}
+
+} // anonymous namespace
+
+int TestMacro_Int_Double_Double(const Func *f, MTdata d, bool relaxedMode)
+{
+ TestInfo test_info{};
+ cl_int error;
+
+ logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
+
+ // Init test_info
+ test_info.threadCount = GetThreadCount();
+ test_info.subBufferSize = BUFFER_SIZE
+ / (sizeof(cl_double) * RoundUpToNextPowerOfTwo(test_info.threadCount));
+ test_info.scale = getTestScale(sizeof(cl_double));
+
+ test_info.step = (cl_uint)test_info.subBufferSize * test_info.scale;
+ if (test_info.step / test_info.subBufferSize != test_info.scale)
+ {
+ // there was overflow
+ test_info.jobCount = 1;
+ }
+ else
+ {
+ test_info.jobCount = (cl_uint)((1ULL << 32) / test_info.step);
+ }
+
+ test_info.f = f;
+ test_info.ftz = f->ftz || gForceFTZ;
+ test_info.relaxedMode = relaxedMode;
+
+ // cl_kernels aren't thread safe, so we make one for each vector size for
+ // every thread
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ test_info.k[i].resize(test_info.threadCount, nullptr);
+ }
+
+ test_info.tinfo.resize(test_info.threadCount);
+ for (cl_uint i = 0; i < test_info.threadCount; i++)
+ {
+ cl_buffer_region region = {
+ i * test_info.subBufferSize * sizeof(cl_double),
+ test_info.subBufferSize * sizeof(cl_double)
+ };
+ test_info.tinfo[i].inBuf =
+ clCreateSubBuffer(gInBuffer, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+ test_info.tinfo[i].inBuf2 =
+ clCreateSubBuffer(gInBuffer2, CL_MEM_READ_ONLY,
+ CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
+ if (error || NULL == test_info.tinfo[i].inBuf2)
+ {
+ vlog_error("Error: Unable to create sub-buffer of gInBuffer2 for "
+ "region {%zd, %zd}\n",
+ region.origin, region.size);
+ goto exit;
+ }
+
+ for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
+ {
+ test_info.tinfo[i].outBuf[j] = clCreateSubBuffer(
+ gOutBuffer[j], CL_MEM_WRITE_ONLY, CL_BUFFER_CREATE_TYPE_REGION,
+ &region, &error);
+ if (error || NULL == test_info.tinfo[i].outBuf[j])
+ {
+ vlog_error("Error: Unable to create sub-buffer of "
+ "gOutBuffer[%d] for region {%zd, %zd}\n",
+ (int)j, region.origin, region.size);
+ goto exit;
+ }
+ }
+ test_info.tinfo[i].tQueue =
+ clCreateCommandQueue(gContext, gDevice, 0, &error);
+ if (NULL == test_info.tinfo[i].tQueue || error)
+ {
+ vlog_error("clCreateCommandQueue failed. (%d)\n", error);
+ goto exit;
+ }
+
+ test_info.tinfo[i].d = MTdataHolder(genrand_int32(d));
+ }
+
+ // Init the kernels
+ {
+ BuildKernelInfo build_info{ test_info.threadCount, test_info.k,
+ test_info.programs, f->nameInCode,
+ relaxedMode };
+ if ((error = ThreadPool_Do(BuildKernelFn,
+ gMaxVectorSizeIndex - gMinVectorSizeIndex,
+ &build_info)))
+ goto exit;
+ }
+
+ // Run the kernels
+ if (!gSkipCorrectnessTesting)
+ {
+ error = ThreadPool_Do(Test, test_info.jobCount, &test_info);
+
+ if (error) goto exit;
+
+ if (gWimpyMode)
+ vlog("Wimp pass");
+ else
+ vlog("passed");
+ }
+
+ vlog("\n");
+
+exit:
+ // Release
+ for (auto i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
+ {
+ for (auto &kernel : test_info.k[i])
+ {
+ clReleaseKernel(kernel);
+ }
+ }
+
+ return error;
+}