The bigfloat package is a necessary dependency for this filter, as it provides both arbitrary-size floating point operations and it allows all floating point values and operations in a code block to be forced into a particular precision, leaving no question as to what precision was employed for an intermediate calculation. This comes in handy when running the reference versions of the complex built-ins. Performance impact is small (~6s for the entire gen_shader_precision_tests.py script on IvyBridge i7 @ 2.9GHz) and is localized to build-time. --- CMakeLists.txt | 1 + cmake/Modules/FindPythonBigfloat.cmake | 43 ++ generated_tests/gen_shader_precision_tests.py | 603 +++++++++++++++++++++++++- 3 files changed, 633 insertions(+), 14 deletions(-) create mode 100644 cmake/Modules/FindPythonBigfloat.cmake
diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a81eed..b13acdb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -193,6 +193,7 @@ find_package(PythonInterp 2.7 REQUIRED) find_package(PythonNumpy 1.6.2 REQUIRED) find_package(PythonMako 0.7.3 REQUIRED) find_package(PythonSix REQUIRED) +find_package(PythonBigfloat 3.1.0 REQUIRED) # Default to compiling with debug information (`gcc -g`): if(NOT CMAKE_BUILD_TYPE) diff --git a/cmake/Modules/FindPythonBigfloat.cmake b/cmake/Modules/FindPythonBigfloat.cmake new file mode 100644 index 0000000..0b65b55 --- /dev/null +++ b/cmake/Modules/FindPythonBigfloat.cmake @@ -0,0 +1,43 @@ +# Copyright (C) 2015 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining +# a copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE +# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +# Find bigfloat +execute_process( + COMMAND ${PYTHON_EXECUTABLE} -c "import sys, bigfloat; sys.stdout.write(bigfloat.MPFR_VERSION_STRING)" + OUTPUT_VARIABLE _version + RESULT_VARIABLE _status + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +set(PythonBigfloat_VERSION_STRING ${_version}) + +# A status returns 0 if everything is okay. And zero is false. To make +# checking in the outer scope less surprising +if (_status EQUAL 0) + set("PythonBigfloat_STATUS" "success") +endif (_status EQUAL 0) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + "PythonBigfloat" + REQUIRED_VARS "PythonBigfloat_STATUS" + VERSION_VAR "PythonBigfloat_VERSION_STRING" +) diff --git a/generated_tests/gen_shader_precision_tests.py b/generated_tests/gen_shader_precision_tests.py index 6c011a3..3195f89 100644 --- a/generated_tests/gen_shader_precision_tests.py +++ b/generated_tests/gen_shader_precision_tests.py @@ -50,31 +50,606 @@ from __future__ import print_function, division, absolute_import from builtin_function import * import os import numpy +import struct +import bigfloat import six from six.moves import range from templates import template_file -tolerances = {'pow': 16.0, - 'exp': 3.0, - 'exp2': 3.0, - 'log': 3.0, - 'log2': 3.0, - 'sqrt': 3.0, - 'inversesqrt': 2.0, - 'op-div': 2.5, - 'op-assign-div': 2.5, - } + +allowed_error_scale = 4.0 trig_builtins = ('sin', 'cos', 'tan', 'asin', 'acos', 'atan', 'sinh', 'cosh', 'tanh', 'asinh', 'acosh', 'atanh') +high_precision = bigfloat.quadruple_precision +low_precision = bigfloat.single_precision + def _is_sequence(arg): return isinstance(arg, (collections.Sequence, numpy.ndarray)) +def _ulpsize(f): + """ determine _ulpsize in the direction of nearest infinity + which gives the worst case scenario for edge cases + """ + if f >= 0: + return bigfloat.next_up(f) - f + else: + return f - bigfloat.next_down(f) + +def _listify(arg): + """ if arg is a scalar, convert it to an array + if it's an array, return it as-is + """ + return arg if _is_sequence(arg) else [arg] + +def _to_bigfloat(arg): + if _is_sequence(arg): + for ar in arg: + assert(isinstance(ar, numpy.float32) or isinstance(ar, bigfloat.BigFloat)) + + else: + assert(isinstance(arg, numpy.float32) or isinstance(arg, bigfloat.BigFloat)) + + if isinstance(arg, bigfloat.BigFloat) or \ + (_is_sequence(arg) and isinstance(arg[0], bigfloat.BigFloat)): + return arg + else: + return list(bigfloat.BigFloat('%.8e' % x) for x in arg) + +def _mod_ref(args): + """Modulus reference function. + Returns x floor (x/y). + + This is a scalar implementation of a vector-componentwise GLSL built-in - + arguments must be in scalar form (float). Vector arguments must be + partitioned by the caller. + Arguments are converted to BigFloat type. + Return value is a list of a single BigFloat value. + + arguments: + args -- list of all arguments + args[0] -- x + args[1] -- y + """ + bfargs = _to_bigfloat(args) + x = bfargs[0] + y = bfargs[1] + return [x - y * (bigfloat.floor(x / y))] + +def _smoothstep_ref(args): + """Smoothstep reference function. + Returns 0.0 if x <= edge0 and 1.0 if x >= edge1 and + performs smooth Hermite interpolation between 0 and 1 + when edge0 < x < edge1. This is useful in cases where + you would want a threshold function with a smooth + transition. This is equivalent to: + t = clamp ((x edge0), 0, 1); + return t * t * (3 a) + y * a + + This is a scalar implementation of a vector-componentwise GLSL built-in - + arguments must be in scalar form (float). Vector arguments must be + partitioned by the caller. + Arguments are converted to BigFloat type. + Return value is a list of a single BigFloat value. + + arguments: + args -- list of all arguments + args[0] -- x + args[1] -- y + args[2] -- a + """ + bfargs = _to_bigfloat(args) + x = bfargs[0] + y = bfargs[1] + a = bfargs[2] + return [x * (bigfloat.BigFloat(1.0) - a) + y * a] + +def _length_ref(args): + """Length reference function. + Returns the length of vector x, i.e., + sqrt(x[0]^2 + x[1]^2 + ...) + + This is a non-componentwise GLSL built-in - + arguments may be scalar (float) or vector form (list). + Arguments are converted to BigFloat type. + Return value is a list of a single BigFloat value. + + arguments: + args -- list of all arguments + args[0] -- x + """ + component_sum = bigfloat.BigFloat(0.0) + x = _to_bigfloat(args[0]) + for arg in x: + component_sum += bigfloat.pow(arg, 2.0) + return [bigfloat.sqrt(component_sum)] + +def _distance_ref(args): + """Distance reference function. + Returns the distance between p0 and p1, i.e., + length (p0 y[1] * x[2] + x[2] * y[0] y[0] * x[1]] + + This is a non-componentwise GLSL built-in - + arguments are 3-element vectors. + Arguments are converted to BigFloat type. + Return value is a 3-element list of BigFloat values. + + arguments: + args -- list of all arguments + args[0] -- x + args[1] -- y + """ + x = _to_bigfloat(args[0]) + y = _to_bigfloat(args[1]) + ret = [x[1] * y[2] - y[1] * x[2], x[2] * y[0] - y[2] * x[0], x[0] * y[1] - y[0] * x[1]] + return ret + +def _normalize_ref(args): + """Normalize reference function. + Returns a vector in the same direction as x but with a + length of 1, ie. x/length(x). + + This is a non-componentwise GLSL built-in - + arguments may be scalar (float) or vector form (list). + Arguments are converted to BigFloat type. + Return value is a list of BigFloat values equal in count to the number of + items in x. + + arguments: + args -- list of all arguments + args[0] -- x + """ + bfargs = _to_bigfloat(args[0]) + normalized_components = [] + comp_len = _length_ref(args) + for component in bfargs: + normalized_components.append(component / comp_len[0]) + return normalized_components + +def _faceforward_ref(args): + """Faceforward reference function. + If dot(Nref, I) < 0 return N, otherwise return 2 N + N must already be normalized in order to achieve the + desired result. + + This is a non-componentwise GLSL built-in - + arguments may be scalar (float) or vector form (list). + Arguments are converted to BigFloat type. + Return value is a list of BigFloat values equal in count to the number of + items in I. + + arguments: + args -- list of all arguments + args[0] -- I + args[1] -- N + """ + I = _to_bigfloat(args[0]) + N = _to_bigfloat(args[1]) + dotNI = _dot_ref([N,I])[0] + ret = [] + for Ncomponent, Icomponent in zip(N,I): + ret.append(Icomponent - bigfloat.BigFloat(2.0) * dotNI * Ncomponent) + return ret + +def _refract_ref(args): + """Refract reference function. + For the incident vector I and surface normal N, and the + ratio of indices of refraction eta, return the refraction + vector. The result is computed by + k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) + if (k < 0.0) + return (0.0) + else + return eta * I - (eta * dot(N, I) + sqrt(k)) * N + The input parameters for the incident vector I and the + surface normal N must already be normalized to get the + desired results. + + This is a non-componentwise GLSL built-in - + arguments may be scalar (float) or vector form (list). + Arguments are converted to BigFloat type. + Return value is a list of BigFloat values equal in count to the number of + items in I. + + arguments: + args -- list of all arguments + args[0] -- I + args[1] -- N + args[2] -- eta + """ + I = _to_bigfloat(args[0]) + N = _to_bigfloat(args[1]) + eta = _to_bigfloat(args[2])[0] + ret = [] + k = bigfloat.BigFloat(1.0) - eta * eta * \ + (bigfloat.BigFloat(1.0) - _dot_ref([N,I])[0] * _dot_ref([N,I])[0]) + if k < 0.0: + for i in range(len(I)): + ret.append(bigfloat.BigFloat(0.0)) + else: + Ntemp = [] + Itemp = [] + for Ncomponent, Icomponent in zip(N,I): + Ntemp = (eta * _dot_ref([N,I])[0] + bigfloat.sqrt(k)) * Ncomponent + Itemp = eta * Icomponent + ret.append(Itemp - Ntemp) + return ret + +def _vec_times_mat_ref(args): + """Matrix Multiplication (Vector * Matrix) reference function. + vec3 v, u; + mat3 m; + u = v * m; + u.x = dot(v, m[0]); // m[0] is the left column of m + u.y = dot(v, m[1]); // dot(a,b) is the inner (dot) product of a and b + u.z = dot(v, m[2]); + + This is a non-componentwise GLSL built-in - + arguments are one vector (list) and one matrix (list (rows) of lists (cols)) + ex. arg[0] = [a, b, c]; arg[1] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j] + Arguments are converted to BigFloat type. + Return value is a list of BigFloat values equal in count to the number of + items in v. + + arguments: + args -- list of all arguments + args[0] -- v + args[1] -- m + """ + v = args[0] + m = args[1] + m_type = glsl_type_of(m) + num_cols = m_type.num_cols + num_rows = m_type.num_rows + components = num_cols + ret = [] + for i in range(components): + m_col = [] + for j in range(num_rows): + m_col.append(m[j][i]) + ret.append(_dot_ref([v,m_col])[0]) + return ret + +def _mat_times_vec_ref(args): + """Matrix Multiplication (Matrix * Vector) reference function. + u = m * v; + u.x = m[0].x * v.x + m[1].x * v.y + m[2].x * v.z; + u.y = m[0].y * v.x + m[1].y * v.y + m[2].y * v.z; + u.z = m[0].z * v.x + m[1].z * v.y + m[2].z * v.z; + + This is a non-componentwise GLSL built-in - + arguments are one matrix (list (rows) of lists (cols)) and one vector (list) + ex. arg[0] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j]; arg[1] = [a, b, c]; + Arguments are converted to BigFloat type. + Return value is a list of BigFloat values equal in count to the number of + items in v. + + arguments: + args -- list of all arguments + args[0] -- m + args[1] -- v + """ + m = args[0] + v = args[1] + m_type = glsl_type_of(m) + num_rows = m_type.num_rows + components = num_rows + ret = [] + for i in range(components): + m_col = m[i] + ret.append(_dot_ref([v,m_col])[0]) + return ret + +def _mat_times_mat_ref(args): + """Matrix Multiplication (Matrix * Matrix) reference function. + mat3 m, n, r; + r = m * n; + r[0].x = m[0].x * n[0].x + m[1].x * n[0].y + m[2].x * n[0].z; + r[1].x = m[0].x * n[1].x + m[1].x * n[1].y + m[2].x * n[1].z; + r[2].x = m[0].x * n[2].x + m[1].x * n[2].y + m[2].x * n[2].z; + + r[0].y = m[0].y * n[0].x + m[1].y * n[0].y + m[2].y * n[0].z; + r[1].y = m[0].y * n[1].x + m[1].y * n[1].y + m[2].y * n[1].z; + r[2].y = m[0].y * n[2].x + m[1].y * n[2].y + m[2].y * n[2].z; + + r[0].z = m[0].z * n[0].x + m[1].z * n[0].y + m[2].z * n[0].z; + r[1].z = m[0].z * n[1].x + m[1].z * n[1].y + m[2].z * n[1].z; + r[2].z = m[0].z * n[2].x + m[1].z * n[2].y + m[2].z * n[2].z; + + This is a non-componentwise GLSL built-in - + arguments are two matrices (list (rows) of lists (cols)) + ex. arg[0] = [[d, e, f], [g, h, i], [j, k, l]]; m[0] == [d, g, j] + Arguments are converted to BigFloat type. + Return value is a flattened matrix in the form of a list of BigFloat values + equal in count to the number of rows in matrix m times the number of + columns in matrix n, ie. [[r0c0, r0c1, r0c2], [r1c0, r1c1, r1c2], ... ]. + + arguments: + args -- list of all arguments + args[0] -- m + args[1] -- n + """ + m = args[0] + n = args[1] + m_type = glsl_type_of(m) + n_type = glsl_type_of(n) + ret = [] + for i in range(m_type.num_rows): + for j in range(n_type.num_cols): + n_col = [] + for k in range(n_type.num_rows): + n_col.append(n[k][j]) + ret.append(_dot_ref([m[i],n_col])[0]) + return ret + +def _capture_error(precise, imprecise): + """Perform the legwork of calculating the difference in error of the high + precision and low precision runs. Decide whether this difference in error + is within allowable tolerances. The range of allowable tolerances is + subjective, as ARB_shader_precision (and GLSL spec as of v4.5) gives no + direct guidance for complex functions. Toronto, et. al. use quadrupled + error as a limit in "Practically Accurate Floating-Point Math," Computing + Now, Oct. 2014. Also use the difference in error and the value of one ulp + at the output to calculate the tolerance range in ulps for use by the + shader test, should this vector pass the badlands check. + """ + + errors = [] + badlands = [] + component_tolerances = [] + with high_precision: + error = bigfloat.abs(precise - imprecise) + errors.append(error) + with low_precision: + ulpsz = _ulpsize(imprecise) + with high_precision: + badlands.append(error > ulpsz * allowed_error_scale) + component_tolerances.append(bigfloat.round(error / ulpsz)) + return errors, badlands, component_tolerances + +def _analyze_ref_fn(fn, args): + """Many functions contain ill-conditioned regions referred to as "badlands" + (see Toronto, et. al., "Practically Accurate Floating-Point Math," + Computing Now, Oct. 2014). Within these regions errors in the inputs are + magnified significantly, making the function impossible to test with any + reasonable accuracy. A complex function that operates on floating point + numbers has the potential to generate such error propagation even if the + inputs are exact floating point numbers, since intermediate results can be + generated with error. In order to identify and avoid these areas, we run + the function once at a lower precision and once at a higher precision, and + compare the outputs. Propagating errors will be greater at lower precision + and less at higher precision for a given set of function inputs, allowing + us to identify the badlands of the function. + """ + + errors = [] + badlands = [] + component_tolerances = [] + with high_precision: + precise = fn(args) + with low_precision: + imprecise = fn(args) + for i, arg in enumerate(imprecise): + cur_errors, cur_badlands, cur_component_tolerances = _capture_error(precise[i], arg) + errors.extend(cur_errors) + badlands.extend(cur_badlands) + component_tolerances.extend(cur_component_tolerances) + return errors, badlands, component_tolerances + + +# These three dicts (simple_fns, complex_fns and mult_fns) divide the names of +# the glsl built-in operations into three mutually exclusive groups. They do +# not need to cover all built-ins however, as a default tolerance is assigned +# for names not appearing in these dicts. +# +# simple_fns are accompanied by a single blanket tolerance value used for all +# calculations (these come from the EXT). +# +# complex_fns are accompanied by a reference to a function that can be used to +# calculate a list of non-uniform tolerances. +# +# the two mult_fns (mult and assign-mult) are special in that they can act like +# a simple function when multiplying floats or vec members together in a +# componentwise manner, or they can act like complex functions when matrices +# are involved and they switch to matrix multiplication rules. +simple_fns = {'op-div': 2.5, + 'op-assign-div': 2.5, + 'pow': 16.0, + 'exp': 3.0, + 'exp2': 3.0, + 'log': 3.0, + 'log2': 3.0, + 'sqrt': 3.0, + 'inversesqrt': 2.0} + +complex_fns = {'mod': _mod_ref, + 'smoothstep': _smoothstep_ref, + 'mix': _mix_ref, + 'length': _length_ref, + 'distance': _distance_ref, + 'dot': _dot_ref, + 'cross': _cross_ref, + 'normalize': _normalize_ref, + 'faceforward': _faceforward_ref, + 'reflect': _reflect_ref, + 'refract': _refract_ref} + +mult_fns = {'op-mult': 0.0, + 'op-assign-mult': 0.0} + +# the complex functions listed here operate in a componentwise manner - the rest do not +componentwise = ('mod', 'mix', 'smoothstep' ) + +def _gen_tolerance(name, rettype, args): + """Return the tolerance that should be allowed for a function for the + test vector passed in. Return -1 for any vectors that would push the + tolerance outside of acceptable bounds + """ + if name in simple_fns: + return simple_fns[name] + elif name in complex_fns: + if name in componentwise: + # for componentwise functions, call the reference function + # for each component (scalar components multiplex here) + # eg. for fn(float a, vec2 b, vec2 c) call: + # _fn_ref(a, b[0], c[0]) + # _fn_ref(a, b[1], c[1]) + # and then capture the results in a list + errors = [] + badlands = [] + component_tolerances = [] + for component in range(rettype.num_cols * rettype.num_rows): + current_args = [] + for arg in args: + # sanitize args so that all analyze operations can be performed on a list + sanitized_arg = _listify(arg) + current_args.append(sanitized_arg[component % len(sanitized_arg)]) + cur_errors, cur_badlands, cur_component_tolerances = _analyze_ref_fn(complex_fns[name], current_args) + errors.extend(cur_errors) + badlands.extend(cur_badlands) + component_tolerances.extend(cur_component_tolerances) + else: + # for non-componentwise functions, all args must be passed in in a single run + # sanitize args so that all analyze operations can be performed on a list + current_args = map(_listify, args) + errors, badlands, component_tolerances = _analyze_ref_fn(complex_fns[name], current_args) + # results are in a list, and may have one or more members + return -1.0 if True in badlands else map(float, component_tolerances) + elif name in mult_fns: + x_type = glsl_type_of(args[0]) + y_type = glsl_type_of(args[1]) + # if matrix types are involved, the multiplication will be matrix multiplication + # and we will have to pass the args through a reference function + if x_type.is_vector and y_type.is_matrix: + mult_func = _vec_times_mat_ref + elif x_type.is_matrix and y_type.is_vector: + mult_func = _mat_times_vec_ref + elif x_type.is_matrix and y_type.is_matrix: + mult_func = _mat_times_mat_ref + else: + # float*float, float*vec, vec*float or vec*vec are all done as + # normal multiplication and share a single tolerance + return mult_fns[name] + # sanitize args so that all analyze operations can be performed on a list + errors, badlands, component_tolerances = _analyze_ref_fn(mult_func, _listify(args)) + # results are in a list, and may have one or more members + return -1.0 if True in badlands else map(float, component_tolerances) + else: + # default for any operations without specified tolerances + return 0.0 + def make_indexers(signature): """Build a list of strings which index into every possible value of the result. For example, if the result is a vec2, @@ -116,7 +691,7 @@ def shader_runner_format(values): retval = '' for x in values: assert isinstance(x, (float, np.float32)) - retval+=' {0:1.8e}'.format(x) + retval += ' {0:1.8e}'.format(x) else: assert isinstance(values, (float, np.float32)) retval = '{0:1.8e}'.format(values) @@ -126,14 +701,14 @@ def shader_runner_format(values): def main(): """ Main function """ - + for signature, test_vectors in sorted(six.iteritems(test_suite)): arg_float_check = tuple( arg.base_type == glsl_float for arg in signature.argtypes) # Filter the test vectors down to only those which deal exclusively in float types #and are not trig functions or determinant() indexers = make_indexers(signature) - num_elements = signature.rettype.num_cols*signature.rettype.num_rows + num_elements = signature.rettype.num_cols * signature.rettype.num_rows invocation = signature.template.format( *['arg{0}'.format(i) for i in range(len(signature.argtypes))]) if (signature.rettype.base_type == glsl_float and @@ -155,7 +730,7 @@ def main(): with open(output_filename, 'w') as f: f.write(template.render_unicode( signature=signature, test_vectors=test_vectors, - tolerances=tolerances, + tolerances=simple_fns, invocation=invocation, num_elements=num_elements, indexers=indexers, -- 2.2.2 _______________________________________________ Piglit mailing list Piglit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/piglit