fmax.

Matt Arsenault Tue, 06 Dec 2016 17:10:14 -0800

> On Dec 5, 2016, at 13:35, Jan Vesely <[email protected]> wrote:
> 
> On Mon, 2016-12-05 at 09:48 -0800, [email protected] 
> <mailto:[email protected]> wrote:
>> From: Matt Arsenault <[email protected]>
>> 
>> These do not use the normal simple format because the number
>> of combinations that need to be tested is simply too large,
>> especially when tests for min3/max3 are added.
>> 
>> The unordered compare tests could be improved. Currently they truly
>> test the unordered compare because of LLVM bug 21610, but
>> ideally that would be fixed.
>> ---
>> tests/cl/program/CMakeLists.cl.txt                 |   1 +
>> .../cl/program/execute/scalar-comparison-float.cl  | 105 +++++
>> tests/cl/program/float-min-max-kernels.cl          | 492 
>> +++++++++++++++++++++
>> tests/cl/program/float-min-max.cpp                 | 475 ++++++++++++++++++++
>> 4 files changed, 1073 insertions(+)
>> create mode 100644 tests/cl/program/float-min-max-kernels.cl
>> create mode 100644 tests/cl/program/float-min-max.cpp
>> 
>> diff --git a/tests/cl/program/CMakeLists.cl.txt 
>> b/tests/cl/program/CMakeLists.cl.txt
>> index c8d7307..5ef0f6b 100644
>> --- a/tests/cl/program/CMakeLists.cl.txt
>> +++ b/tests/cl/program/CMakeLists.cl.txt
>> @@ -2,3 +2,4 @@ piglit_cl_add_program_test (tester program-tester.c)
>> piglit_cl_add_program_test (max-work-item-sizes max-work-item-sizes.c)
>> piglit_cl_add_program_test (bitcoin-phatk bitcoin-phatk.c)
>> piglit_cl_add_program_test (predefined-macros predefined-macros.c)
>> +piglit_cl_add_program_test (float-min-max float-min-max.cpp)
>> diff --git a/tests/cl/program/execute/scalar-comparison-float.cl 
>> b/tests/cl/program/execute/scalar-comparison-float.cl
>> index 4891fc5..598fae0 100644
>> --- a/tests/cl/program/execute/scalar-comparison-float.cl
>> +++ b/tests/cl/program/execute/scalar-comparison-float.cl
>> @@ -148,6 +148,71 @@ arg_in:  1 float -3.5
>> arg_in:  2 float 4.5
>> arg_out: 0 buffer int[1] 1
>> 
>> +
>> +[test]
>> +name: select_max_gt
>> +kernel_name: select_max_gt
>> +global_size: 24 0 0
>> +
>> +arg_out: 0 buffer float[24]    \
>> +  0.0  1.0  2.0  2.0  0.0  0.0 \
>> +  NAN  NAN  1.0  NAN -1.0  NAN \
>> +  0.0  0.0 97.0  INF  INF  INF \
>> +  NAN  NAN  INF  NAN -INF  INF
>> +
>> +arg_in: 1 buffer float[24]     \
>> +  0.0  1.0  1.0  2.0  0.0 -1.0 \
>> +  NAN  1.0  NAN -1.0  NAN  0.0 \
>> +  0.0 -0.0 37.0  INF  INF -INF \
>> + -INF  INF  NAN -INF  NAN  0.0
>> +
>> +arg_in: 2 buffer float[24]     \
>> +  0.0  1.0  2.0  1.0 -1.0  0.0 \
>> +  NAN  NAN  1.0  NAN -1.0  NAN \
>> + -0.0  0.0 97.0  INF -INF  INF \
>> + -INF  NAN  INF  NAN -INF  INF
>> +
>> +[test]
>> +name: select_max_gte
>> +kernel_name: select_max_gte
>> +global_size: 15 0 0
>> +
>> +arg_out: 0 buffer float[15]    \
>> +  0.0  1.0  2.0  2.0  0.0  0.0 \
>> +  NAN  NAN  1.0  NAN -1.0  NAN \
>> +  0.0  0.0 97.0
>> +
>> +arg_in: 1 buffer float[15]     \
>> +  0.0  1.0  1.0  2.0  0.0 -1.0 \
>> +  NAN  1.0  NAN -1.0  NAN  0.0 \
>> +  0.0 -0.0 37.0
>> +
>> +arg_in: 2 buffer float[15]     \
>> +  0.0  1.0  2.0  1.0 -1.0  0.0 \
>> +  NAN  NAN  1.0  NAN -1.0  NAN \
>> + -0.0  0.0 97.0
>> +
>> +[test]
>> +name: select_min_gt
>> +kernel_name: select_min_gt
>> +global_size: 15 0 0
>> +
>> +arg_out: 0 buffer float[15]    \
>> +  0.0  1.0  1.0  1.0 -1.0 -1.0 \
>> +  NAN  NAN  NAN  NAN NAN  NAN  \
>> +  0.0  0.0 37.0
>> +
>> +arg_in: 1 buffer float[15] \
>> +  0.0  1.0  1.0  2.0  0.0 -1.0 \
>> +  NAN  1.0  NAN -1.0  NAN  0.0 \
>> +  0.0 -0.0  37.0
>> +
>> +arg_in: 2 buffer float[15]     \
>> +  0.0  1.0  2.0  1.0 -1.0  0.0 \
>> +  NAN  NAN  1.0  NAN -1.0  NAN \
>> + -0.0  0.0 97.0
>> +
>> +
>> !*/
>> 
>> kernel void eq(global int* out, float a, float b) {
>> @@ -173,3 +238,43 @@ kernel void lt(global int* out, float a, float b) {
>> kernel void lte(global int* out, float a, float b) {
>>      out[0] = a <= b;
>> }
>> +
>> +kernel void select_max_gt(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] > b[id]) ? a[id] : b[id];
>> +}
>> +
>> +kernel void select_max_gte(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] >= b[id]) ? a[id] : b[id];
>> +}
>> +
>> +kernel void select_min_gt(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] > b[id]) ? b[id] : a[id];
>> +}
>> +
>> +kernel void select_min_gte(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] >= b[id]) ? b[id] : a[id];
>> +}
>> +
>> +kernel void select_min_lt(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] < b[id]) ? a[id] : b[id];
>> +}
>> +
>> +kernel void select_max_lt(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] < b[id]) ? b[id] : a[id];
>> +}
>> +
>> +kernel void select_min_lte(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] <= b[id]) ? a[id] : b[id];
>> +}
>> +
>> +kernel void select_max_lte(global float* restrict out, global float* 
>> restrict a, global float* restrict b) {
>> +    int id = get_global_id(0);
>> +    out[id] = (a[id] <= b[id]) ? b[id] : a[id];
>> +}
>> diff --git a/tests/cl/program/float-min-max-kernels.cl 
>> b/tests/cl/program/float-min-max-kernels.cl
>> new file mode 100644
>> index 0000000..09f31d4
>> --- /dev/null
>> +++ b/tests/cl/program/float-min-max-kernels.cl
>> @@ -0,0 +1,492 @@
>> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>> +
>> +kernel void select_max_gt_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a > b) ? a : b;
>> +}
>> +
>> +kernel void select_max_ge_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a >= b) ? a : b;
>> +}
>> +
>> +kernel void select_min_gt_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a > b) ? b : a;
>> +}
>> +
>> +kernel void select_min_ge_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a >= b) ? b : a;
>> +}
>> +
>> +kernel void select_max_lt_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a < b) ? b : a;
>> +}
>> +
>> +kernel void select_max_le_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a <= b) ? b : a;
>> +}
>> +
>> +kernel void select_min_lt_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a < b) ? a : b;
>> +}
>> +
>> +kernel void select_min_le_f32(global float* restrict out,
>> +                              constant float* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = (a <= b) ? a : b;
>> +}
>> +
>> +kernel void test_fmin_f32(global float* restrict out,
>> +                          constant float* in,
>> +                          int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = fmin(a, b);
>> +}
>> +
>> +kernel void test_fmax_f32(global float* restrict out,
>> +                          constant float* in,
>> +                          int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    out[n * idx + idy] = fmax(a, b);
>> +}
>> +
>> +// FIXME: It is a canonicalization bug that an unordered comparison is
>> +// emitted for this if the intermediate cmp variable is used.
>> +kernel void select_max_ugt_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a <= b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_max_uge_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a < b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_min_ugt_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a <= b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_min_uge_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a < b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_max_ult_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a >= b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_max_ule_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a > b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_min_ult_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a >= b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_min_ule_f32(global float* restrict out,
>> +                               constant float* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    float a = in[idx];
>> +    float b = in[idy];
>> +
>> +    bool cmp = !(a > b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +#if cl_khr_fp64
>> +kernel void select_max_gt_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a > b) ? a : b;
>> +}
>> +
>> +kernel void select_max_ge_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a >= b) ? a : b;
>> +}
>> +
>> +kernel void select_min_gt_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a > b) ? b : a;
>> +}
>> +
>> +kernel void select_min_ge_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a >= b) ? b : a;
>> +}
>> +
>> +kernel void select_max_lt_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a < b) ? b : a;
>> +}
>> +
>> +kernel void select_max_le_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a <= b) ? b : a;
>> +}
>> +
>> +kernel void select_min_lt_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a < b) ? a : b;
>> +}
>> +
>> +kernel void select_min_le_f64(global double* restrict out,
>> +                              constant double* restrict in,
>> +                              int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = (a <= b) ? a : b;
>> +}
>> +
>> +kernel void test_fmin_f64(global double* restrict out,
>> +                          constant double* in,
>> +                          int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = fmin(a, b);
>> +}
>> +
>> +kernel void test_fmax_f64(global double* restrict out,
>> +                          constant double* in,
>> +                          int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    out[n * idx + idy] = fmax(a, b);
>> +}
>> +
>> +// FIXME: It is a canonicalization bug that an unordered comparison is
>> +// emitted for this if the intermediate cmp variable is used.
>> +kernel void select_max_ugt_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a <= b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_max_uge_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a < b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_min_ugt_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a <= b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_min_uge_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a < b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_max_ult_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a >= b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_max_ule_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a > b);
>> +    out[n * idx + idy] = cmp ? b : a;
>> +}
>> +
>> +kernel void select_min_ult_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a >= b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +kernel void select_min_ule_f64(global double* restrict out,
>> +                               constant double* restrict in,
>> +                               int n)
>> +{
>> +    int idx = get_global_id(0);
>> +    int idy = get_global_id(1);
>> +
>> +    double a = in[idx];
>> +    double b = in[idy];
>> +
>> +    bool cmp = !(a > b);
>> +    out[n * idx + idy] = cmp ? a : b;
>> +}
>> +
>> +#endif
>> diff --git a/tests/cl/program/float-min-max.cpp 
>> b/tests/cl/program/float-min-max.cpp
>> new file mode 100644
>> index 0000000..296b446
>> --- /dev/null
>> +++ b/tests/cl/program/float-min-max.cpp
>> @@ -0,0 +1,475 @@
>> +
>> +extern "C" {
>> +#include "piglit-framework-cl-program.h"
>> +}
>> +
>> +PIGLIT_CL_PROGRAM_TEST_CONFIG_BEGIN
>> +
>> +    config.name = "Run kernels which will use select min / max 
>> instructions";
>> +
>> +    config.run_per_device = true;
>> +
>> +    config.program_source_file = "float-min-max-kernels.cl";
>> +    config.kernel_name = NULL; // We have many kernels.
>> +
>> +PIGLIT_CL_PROGRAM_TEST_CONFIG_END
>> +
>> +
>> +template <typename Real>
>> +struct TestFunction
>> +{
>> +    typedef Real (*MinMaxFunc)(Real, Real);
>> +
>> +    const char* kernel_name;
>> +    MinMaxFunc ref_func;
>> +};
>> +
>> +static const size_t n_cases = 32;
>> +static const size_t n_denormal_cases = 8;
>> +static const size_t n_tests = 18;
>> +
>> +template <typename Real>
>> +class FMinFMaxTest
>> +{
>> +public:
>> +    typedef typename TestFunction<Real>::MinMaxFunc MinMaxFunc;
>> +
>> +    static const Real cases[n_cases];
>> +    static const TestFunction<Real> test_minmax_fns[n_tests];
>> +
>> +
>> +    FMinFMaxTest() { }
>> +    ~FMinFMaxTest() { }
>> +
>> +    static cl_mem create_input_buffer(const piglit_cl_program_test_env* env,
>> +                                      bool denormals);
>> +
>> +    static bool verify_results(MinMaxFunc func,
>> +                               const Real* results,
>> +                               bool test_denormals);
>> +    static bool run_minmax_test(const piglit_cl_program_test_env* env,
>> +                                const TestFunction<Real>* test_fn,
>> +                                cl_mem input,
>> +                                bool test_denormals);
>> +
>> +    static piglit_result run_tests(const piglit_cl_program_test_env* env,
>> +                                   cl_device_fp_config fp_config);
>> +
>> +};
>> +
>> +template <typename Real>
>> +static Real select_max_gt(Real a, Real b)
>> +{
>> +    return (a > b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_ge(Real a, Real b)
>> +{
>> +    return (a >= b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_gt(Real a, Real b)
>> +{
>> +    return (a > b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_ge(Real a, Real b)
>> +{
>> +    return (a >= b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_lt(Real a, Real b)
>> +{
>> +    return (a < b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_le(Real a, Real b)
>> +{
>> +    return (a <= b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_lt(Real a, Real b)
>> +{
>> +    return (a < b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_le(Real a, Real b)
>> +{
>> +    return (a <= b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_ugt(Real a, Real b)
>> +{
>> +    return !(a <= b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_uge(Real a, Real b)
>> +{
>> +    return !(a < b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_ugt(Real a, Real b)
>> +{
>> +    return !(a <= b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_uge(Real a, Real b)
>> +{
>> +    return !(a < b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_ult(Real a, Real b)
>> +{
>> +    return !(a >= b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_max_ule(Real a, Real b)
>> +{
>> +    return !(a > b) ? b : a;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_ult(Real a, Real b)
>> +{
>> +    return !(a >= b) ? a : b;
>> +}
>> +
>> +template <typename Real>
>> +static Real select_min_ule(Real a, Real b)
>> +{
>> +    return !(a > b) ? a : b;
>> +}
>> +
>> +#define TYPE_SUFFIX "_f32"
>> +#define TYPE_NAME float
>> +#define TEST_FN(name) { #name TYPE_SUFFIX, name<TYPE_NAME> }
>> +
>> +template <>
>> +const TestFunction<float> FMinFMaxTest<float>::test_minmax_fns[n_tests] = {
>> +    TEST_FN(select_max_gt),
>> +    TEST_FN(select_max_ge),
>> +    TEST_FN(select_min_gt),
>> +    TEST_FN(select_min_ge),
>> +
>> +    TEST_FN(select_max_lt),
>> +    TEST_FN(select_max_le),
>> +    TEST_FN(select_min_lt),
>> +    TEST_FN(select_min_le),
>> +
>> +    TEST_FN(select_max_ugt),
>> +    TEST_FN(select_max_uge),
>> +    TEST_FN(select_min_ugt),
>> +    TEST_FN(select_min_uge),
>> +
>> +    TEST_FN(select_max_ult),
>> +    TEST_FN(select_max_ule),
>> +    TEST_FN(select_min_ult),
>> +    TEST_FN(select_min_ule),
>> +
>> +    { "test_fmin_f32", fminf },
>> +    { "test_fmax_f32", fmaxf }
>> +};
>> +
>> +#undef TYPE_SUFFIX
>> +#undef TYPE_NAME
>> +#define TYPE_SUFFIX "_f64"
>> +#define TYPE_NAME double
>> +
>> +template <>
>> +const TestFunction<double> FMinFMaxTest<double>::test_minmax_fns[n_tests] = 
>> {
>> +    TEST_FN(select_max_gt),
>> +    TEST_FN(select_max_ge),
>> +    TEST_FN(select_min_gt),
>> +    TEST_FN(select_min_ge),
>> +
>> +    TEST_FN(select_max_lt),
>> +    TEST_FN(select_max_le),
>> +    TEST_FN(select_min_lt),
>> +    TEST_FN(select_min_le),
>> +
>> +    TEST_FN(select_max_ugt),
>> +    TEST_FN(select_max_uge),
>> +    TEST_FN(select_min_ugt),
>> +    TEST_FN(select_min_uge),
>> +
>> +    TEST_FN(select_max_ult),
>> +    TEST_FN(select_max_ule),
>> +    TEST_FN(select_min_ult),
>> +    TEST_FN(select_min_ule),
>> +
>> +    { "test_fmin_f64", fmin },
>> +    { "test_fmax_f64", fmax }
>> +};
>> +
>> +#undef TYPE_SUFFIX
>> +#undef TYPE_NAME
>> +
>> +template <>
>> +const float FMinFMaxTest<float>::cases[n_cases] = {
>> +    0.0f,
>> +    -0.0f,
>> +
>> +    0.5f,
>> +    -0.5f,
>> +
>> +    -1.0f,
>> +    1.0f,
>> +
>> +    -2.0f,
>> +    2.0f,
>> +
>> +    3.0f,
>> +    -3.0f,
>> +
>> +    4.0f,
>> +    -4.0f,
>> +
>> +    12345.0,
>> +
>> +    0x1p-126f,         // Minimum normal number
>> +    -0x1p-126f,
>> +
>> +    0x1p-126f,         // Min float
>> +    -0x1p-126f,
>> +
>> +    0x1.fffffep+127f,  // Max float
>> +    -0x1.fffffep+127f,
>> +
>> +    0x1p-23f,          // Epsilon
>> +    -0x1p-23f,
>> +
>> +    INFINITY,
>> +    -INFINITY,
>> +    NAN,
>> +
>> +    // Denormals.
>> +    0x1p-149f,         // Denorm min
>> +    -0x1p-149f,
>> +
>> +    0x1p-148f,         // Denorm min * 2.0
>> +    -0x1p-148f,
>> +
>> +    0x1.fffffcp-127f,  // Max denormal
>> +    -0x1.fffffcp-127f,
>> +
>> +    0x1.fffffcp-128f,  // Max denormal / 2.0
>> +    -0x1.fffffcp-128f
>> +};
>> +
>> +template <>
>> +const double FMinFMaxTest<double>::cases[n_cases] = {
>> +    0.0,
>> +    -0.0,
>> +
>> +    0.5,
>> +    -0.5,
>> +
>> +    -1.0,
>> +    1.0,
>> +
>> +    -2.0,
>> +    2.0,
>> +
>> +    3.0,
>> +    -3.0,
>> +
>> +    4.0,
>> +    -4.0,
>> +
>> +    12345.0,
>> +
>> +    0x1.fffffffffffffp+1023, // Maximum double
>> +    -0x1.fffffffffffffp+1023,
>> +
>> +    0x1p-1022, // Minimum normal number
>> +    -0x1p-1022,
>> +
>> +    0x1p-52, // Epsilon
>> +    -0x1p-52,
>> +
>> +    INFINITY,
>> +    -INFINITY,
>> +    NAN,
>> +
>> +    // Denormals.
>> +    0x0.0000000000001p-1022, // Denorm min
>> +    -0x0.0000000000001p-1022,
>> +
>> +    0x0.0000000000002p-1022, // Denorm min * 2.0
>> +    -0x0.0000000000002p-1022,
>> +
>> +    0x0.fffffffffffffp-1022, // Max denormal
>> +    -0x0.fffffffffffffp-1022,
>> +
>> +    0x0.8p-1022, // Max denormal / 2.0
>> +    -0x0.8p-1022
>> +};
>> +
>> +
>> +
>> +template <typename Real>
>> +cl_mem FMinFMaxTest<Real>::create_input_buffer(const 
>> piglit_cl_program_test_env* env,
>> +                                               bool denormals)
>> +{
>> +    const size_t n = denormals ? n_cases : (n_cases - n_denormal_cases);
>> +    cl_mem buf = piglit_cl_create_buffer(env->context, CL_MEM_READ_ONLY, n 
>> * n * sizeof(Real));
>> +    if (!buf)
>> +        return NULL;
>> +
>> +    for (size_t i = 0; i < n; ++i)
>> +    {
>> +        if (!piglit_cl_write_buffer(env->context->command_queues[0], buf, i 
>> * n * sizeof(Real), n * sizeof(Real), cases))
>> +        {
>> +            // Leaking buf
>> +            return NULL;
>> +        }
>> +    }
>> +
>> +    return buf;
>> +}
>> +
>> +template <typename Real>
>> +bool FMinFMaxTest<Real>::verify_results(MinMaxFunc func,
>> +                                        const Real* results,
>> +                                        bool test_denormals)
>> +{
>> +    bool failed = false;
>> +
>> +    const size_t n = test_denormals ? n_cases : (n_cases - 
>> n_denormal_cases);
>> +
>> +    for (size_t i = 0; i < n; ++i)
>> +    {
>> +        for (size_t j = 0; j < n; ++j)
>> +        {
>> +            Real x = cases[i];
>> +            Real y = cases[j];
>> +
>> +            Real ref = func(x, y);
>> +            Real result = results[n * i + j];
>> +
>> +            failed |= !piglit_cl_probe_floating(result, ref, 0);
>> +        }
>> +    }
>> +
>> +    return failed;
>> +}
>> +
>> +template <typename Real>
>> +bool FMinFMaxTest<Real>::run_minmax_test(const piglit_cl_program_test_env* 
>> env,
>> +                                         const TestFunction<Real>* test_fn,
>> +                                         cl_mem input,
>> +                                         bool test_denormals)
>> +{
>> +    const size_t n = test_denormals ? n_cases : (n_cases - 
>> n_denormal_cases);
>> +    const cl_int n_i = (cl_int) n;
>> +    const size_t global_size[2] = { n, n };
>> +    printf("Create kernel '%s'\n", test_fn->kernel_name);
>> +    cl_kernel kernel = piglit_cl_create_kernel(env->program,
>> +                                               test_fn->kernel_name);
>> +    if (!kernel)
>> +    {
>> +        return true;
>> +    }
>> +
>> +    Real* ptr_out = new Real[n * n]();
>> +    if (!ptr_out)
>> +    {
>> +        return true;
>> +    }
>> +
>> +    cl_mem mem_out = piglit_cl_create_buffer(env->context, 
>> CL_MEM_WRITE_ONLY,
>> +                                             n * n * sizeof(Real));
>> +    if (!mem_out)
>> +    {
>> +        delete[] ptr_out;
>> +        return true;
>> +    }
>> +
>> +    piglit_cl_set_kernel_buffer_arg(kernel, 0, &mem_out);
>> +    piglit_cl_set_kernel_buffer_arg(kernel, 1, &input);
>> +    piglit_cl_set_kernel_arg(kernel, 2, sizeof(cl_int), &n_i);
>> +
>> +    piglit_cl_execute_ND_range_kernel(env->context->command_queues[0],
>> +                                      kernel,
>> +                                      2,
>> +                                      NULL,
>> +                                      global_size,
>> +                                      NULL);
>> +
>> +    bool failed = !piglit_cl_read_buffer(env->context->command_queues[0], 
>> mem_out, 0,
>> +                                         n * n * sizeof(Real), ptr_out);
>> +    if (!failed)
>> +        failed = verify_results(test_fn->ref_func, ptr_out, test_denormals);
>> +
>> +    delete[] ptr_out;
>> +
>> +    // Leaking mem_out
>> +    return failed;
>> +}
>> +
>> +template <typename Real>
>> +piglit_result FMinFMaxTest<Real>::run_tests(const 
>> piglit_cl_program_test_env* env,
>> +                                            cl_device_fp_config fp_config)
>> +{
>> +    bool failed = false;
>> +
>> +    bool test_denormals = (fp_config & CL_FP_DENORM) != 0;
>> +
>> +    cl_mem input = create_input_buffer(env, test_denormals);
>> +    if (!input)
>> +        return PIGLIT_FAIL;
>> +
>> +    for (size_t i = 0; i < n_tests; ++i)
>> +    {
>> +        if (run_minmax_test(env, &test_minmax_fns[i], input, 
>> test_denormals))
>> +        {
>> +            fprintf(stderr, "Failure testing kernel %s\n", 
>> test_minmax_fns[i].kernel_name);
>> +            return PIGLIT_FAIL;
>> +        }
>> +    }
>> +
>> +    return failed ? PIGLIT_FAIL : PIGLIT_PASS;
>> +}
>> +
>> +piglit_result
>> +piglit_cl_test(const int argc,
>> +               const char** argv,
>> +               const piglit_cl_program_test_config* config,
>> +               const piglit_cl_program_test_env* env)
>> +{
>> +    cl_device_fp_config float_fp_config = 0;
>> +    cl_device_fp_config double_fp_config = 0;
>> +
>> +    clGetDeviceInfo(env->device_id, CL_DEVICE_SINGLE_FP_CONFIG,
>> +                    sizeof(cl_device_fp_config), &float_fp_config, NULL);
>> +
>> +    clGetDeviceInfo(env->device_id, CL_DEVICE_DOUBLE_FP_CONFIG,
>> +                    sizeof(cl_device_fp_config), &double_fp_config, NULL);
>> +
>> +    piglit_result result = FMinFMaxTest<float>().run_tests(env,
>> +                                                           float_fp_config);
>> +    if (result != PIGLIT_PASS)
>> +        return result;
>> +
>> +    if (double_fp_config != 0) {
>> +        piglit_result result = FMinFMaxTest<double>().run_tests(env,
>> +                                                                
>> double_fp_config);
>> +        if (result != PIGLIT_PASS)
>> +            return result;
>> +    }
>> +
>> +    return PIGLIT_PASS;
>> +}
> 
> why are these tests created at runtime instead of build time? the c++
> code would be better as python generator.
> 
> Jan



The number of combinations started growing, and repeating the same list of 
inputs for the tests and manually adding the output results grew tiresome. I 
didn’t see other precedent for other tests to generate the test outputs. 

-Matt

_______________________________________________
Piglit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/piglit

Re: [Piglit] [PATCH 03/10] cl: Add tests for different versions of fmin / fmax.

Reply via email to