On Sun, Dec 20, 2015 at 4:04 PM, Ganesh Ajjanagadde <gajjanaga...@gmail.com> wrote: > On Sun, Dec 20, 2015 at 12:04 PM, Ganesh Ajjanagadde > <gajjanaga...@gmail.com> wrote: >> Source code is from Boost: >> http://www.boost.org/doc/libs/1_46_1/boost/math/special_functions/erf.hpp >> with appropriate modifications for FFmpeg. >> >> Tested on interval -6 to 6 (beyond which it saturates), +/-NAN, +/-INFINITY >> under -fsanitize=undefined on clang to test for possible undefined behavior. >> >> (Below will be trimmed for actual commit, given for completeness here) >> >> Note that this will remove the erf dependency for dynaudnorm, making it >> universally available for clients. I have not done this in this patch; >> may be easily done once this is tested on a broken libm and reviewed. >> >> This function turns out to actually be more accurate and faster than the >> libm (GNU/BSD's/Mac OS X), and I can think of 3 reasons why upstream >> does not use this: >> 1. They are not aware of it. >> 2. They are concerned about licensing - this applies especially to GNU >> libm. >> 3. They do not know and/or appreciate the benefits of rational >> approximations over polynomial approximations. Boost uses them to great >> effect, see e.g swr/resample for bessel derived from them, which is also >> similarly superior to libm variants. >> >> First, performance. >> sample benchmark (clang -O3, Haswell, GNU/Linux): >> >> 3e8 values evenly spaced from 0 to 6 >> time (libm): >> ./test 13.39s user 0.00s system 100% cpu 13.376 total >> time (boost based): >> ./test 9.20s user 0.00s system 100% cpu 9.190 total >> >> Second, accuracy. >> 1e8 eval pts from 0 to 6 >> maxdiff (absolute): 2.2204460492503131e-16 >> illustration: >> erf(0.6): >> libm : 0.60385609084792602 >> boost : 0.60385609084792591 >> real : 0.60385609084792591 >> >> i.e libm is actually incorrectly rounded. Note that this is clear from: >> https://github.com/JuliaLang/openlibm/blob/master/src/s_erf.c (the Sun >> implementation used by both BSD and GNU libm's), where only 1 ulp is >> guaranteed. >> I suspect Boost's is correctly rounded (0.5 ulp) based on their error >> analysis but have not checked or verified formally that this is the >> case. >> >> Reviewed-by: James Almer <jamr...@gmail.com> >> Signed-off-by: Ganesh Ajjanagadde <gajjanaga...@gmail.com> >> --- >> configure | 1 - >> libavutil/libm.h | 198 >> +++++++++++++++++++++++++++++++++++++++++++++++++++++++ >> 2 files changed, 198 insertions(+), 1 deletion(-) >> >> diff --git a/configure b/configure >> index da74616..89c98d1 100755 >> --- a/configure >> +++ b/configure >> @@ -2851,7 +2851,6 @@ cropdetect_filter_deps="gpl" >> delogo_filter_deps="gpl" >> deshake_filter_select="pixelutils" >> drawtext_filter_deps="libfreetype" >> -dynaudnorm_filter_deps="erf" >> ebur128_filter_deps="gpl" >> eq_filter_deps="gpl" >> fftfilt_filter_deps="avcodec" >> diff --git a/libavutil/libm.h b/libavutil/libm.h >> index 37b8e86..f849bbb 100644 >> --- a/libavutil/libm.h >> +++ b/libavutil/libm.h >> @@ -1,4 +1,5 @@ >> /* >> + * erf function: Copyright (c) 2006 John Maddock >> * This file is part of FFmpeg. >> * >> * FFmpeg is free software; you can redistribute it and/or >> @@ -76,6 +77,203 @@ static av_always_inline double copysign(double x, double >> y) >> #define cosf(x) ((float)cos(x)) >> #endif >> >> +#if !HAVE_ERF >> +static inline double ff_eval_poly(const double *coeff, int size, double x) { >> + double sum = coeff[size-1]; >> + int i; >> + for (i = size-2; i >= 0; --i) { >> + sum *= x; >> + sum += coeff[i]; >> + } >> + return sum; >> +} >> + >> +/** >> + * erf function >> + * Algorithm taken from the Boost project, source: >> + * http://www.boost.org/doc/libs/1_46_1/boost/math/special_functions/erf.hpp >> + * Use, modification and distribution are subject to the >> + * Boost Software License, Version 1.0 (see notice below). >> + * Boost Software License - Version 1.0 - August 17th, 2003 >> +Permission is hereby granted, free of charge, to any person or organization >> +obtaining a copy of the software and accompanying documentation covered by >> +this license (the "Software") to use, reproduce, display, distribute, >> +execute, and transmit the Software, and to prepare derivative works of the >> +Software, and to permit third-parties to whom the Software is furnished to >> +do so, all subject to the following: >> + >> +The copyright notices in the Software and this entire statement, including >> +the above license grant, this restriction and the following disclaimer, >> +must be included in all copies of the Software, in whole or in part, and >> +all derivative works of the Software, unless such copies or derivative >> +works are solely in the form of machine-executable object code generated by >> +a source language processor. >> + >> +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR >> +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT >> +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE >> +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, >> +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> +DEALINGS IN THE SOFTWARE. >> + */ >> +static inline double erf(double z) >> +{ >> + double result; >> + >> + /* handle the symmetry: erf(-x) = -erf(x) */ >> + if (z < 0) >> + return -erf(-z); >> + >> + /* branch based on range of z, and pick appropriate approximation */ >> + if (z == 0) >> + return 0; >> + else if (z < 1e-10) >> + return z * 1.125 + z * 0.003379167095512573896158903121545171688; >> + else if (z < 0.5) { >> + // Maximum Deviation Found: 1.561e-17 >> + // Expected Error Term: 1.561e-17 >> + // Maximum Relative Change in Control Points: 1.155e-04 >> + // Max Error found at double precision = 2.961182e-17 >> + >> + static const double y = 1.044948577880859375; >> + static const double p[] = { >> + 0.0834305892146531832907, >> + -0.338165134459360935041, >> + -0.0509990735146777432841, >> + -0.00772758345802133288487, >> + -0.000322780120964605683831, >> + }; >> + static const double q[] = { >> + 1, >> + 0.455004033050794024546, >> + 0.0875222600142252549554, >> + 0.00858571925074406212772, >> + 0.000370900071787748000569, >> + }; >> + double zz = z * z; >> + return z * (y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), zz) / >> ff_eval_poly(q, FF_ARRAY_ELEMS(q), zz)); >> + } >> + /* here onwards compute erfc */ >> + else if (z < 1.5) { >> + // Maximum Deviation Found: 3.702e-17 >> + // Expected Error Term: 3.702e-17 >> + // Maximum Relative Change in Control Points: 2.845e-04 >> + // Max Error found at double precision = 4.841816e-17 >> + static const double y = 0.405935764312744140625; >> + static const double p[] = { >> + -0.098090592216281240205, >> + 0.178114665841120341155, >> + 0.191003695796775433986, >> + 0.0888900368967884466578, >> + 0.0195049001251218801359, >> + 0.00180424538297014223957, >> + }; >> + static const double q[] = { >> + 1, >> + 1.84759070983002217845, >> + 1.42628004845511324508, >> + 0.578052804889902404909, >> + 0.12385097467900864233, >> + 0.0113385233577001411017, >> + 0.337511472483094676155e-5, >> + }; >> + result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 0.5) / >> ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 0.5); >> + result *= exp(-z * z) / z; >> + return 1 - result; >> + } >> + else if (z < 2.5) { >> + // Max Error found at double precision = 6.599585e-18 >> + // Maximum Deviation Found: 3.909e-18 >> + // Expected Error Term: 3.909e-18 >> + // Maximum Relative Change in Control Points: 9.886e-05 >> + static const double y = 0.50672817230224609375; >> + static const double p[] = { >> + -0.0243500476207698441272, >> + 0.0386540375035707201728, >> + 0.04394818964209516296, >> + 0.0175679436311802092299, >> + 0.00323962406290842133584, >> + 0.000235839115596880717416, >> + }; >> + static const double q[] = { >> + 1, >> + 1.53991494948552447182, >> + 0.982403709157920235114, >> + 0.325732924782444448493, >> + 0.0563921837420478160373, >> + 0.00410369723978904575884, >> + }; >> + result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 1.5) / >> ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 1.5); >> + result *= exp(-z * z) / z; >> + return 1 - result; >> + } >> + else if (z < 4.5) { >> + // Maximum Deviation Found: 1.512e-17 >> + // Expected Error Term: 1.512e-17 >> + // Maximum Relative Change in Control Points: 2.222e-04 >> + // Max Error found at double precision = 2.062515e-17 >> + static const double y = 0.5405750274658203125; >> + static const double p[] = { >> + 0.00295276716530971662634, >> + 0.0137384425896355332126, >> + 0.00840807615555585383007, >> + 0.00212825620914618649141, >> + 0.000250269961544794627958, >> + 0.113212406648847561139e-4, >> + }; >> + static const double q[] = { >> + 1, >> + 1.04217814166938418171, >> + 0.442597659481563127003, >> + 0.0958492726301061423444, >> + 0.0105982906484876531489, >> + 0.000479411269521714493907, >> + }; >> + result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), z - 3.5) / >> ff_eval_poly(q, FF_ARRAY_ELEMS(q), z - 3.5); >> + result *= exp(-z * z) / z; >> + return 1 - result; >> + } >> + /* differ from Boost here, the claim of underflow of erfc(x) past 5.8 is >> + * slightly incorrect, change to 5.92 >> + * (really somewhere between 5.9125 and 5.925 is when it saturates) */ >> + else if (z < 5.92) { >> + // Max Error found at double precision = 2.997958e-17 >> + // Maximum Deviation Found: 2.860e-17 >> + // Expected Error Term: 2.859e-17 >> + // Maximum Relative Change in Control Points: 1.357e-05 >> + static const double y = 0.5579090118408203125; >> + static const double p[] = { >> + 0.00628057170626964891937, >> + 0.0175389834052493308818, >> + -0.212652252872804219852, >> + -0.687717681153649930619, >> + -2.5518551727311523996, >> + -3.22729451764143718517, >> + -2.8175401114513378771, >> + }; >> + static const double q[] = { >> + 1, >> + 2.79257750980575282228, >> + 11.0567237927800161565, >> + 15.930646027911794143, >> + 22.9367376522880577224, >> + 13.5064170191802889145, >> + 5.48409182238641741584, >> + }; >> + result = y + ff_eval_poly(p, FF_ARRAY_ELEMS(p), 1 / z) / >> ff_eval_poly(q, FF_ARRAY_ELEMS(q), 1 / z); >> + result *= exp(-z * z) / z; >> + return 1 - result; >> + } >> + /* handle the nan case, but don't use isnan for max portability */ >> + else if (z != z) >> + return z; >> + /* finally return saturated result */ >> + else >> + return 1; >> +} >> +#endif >> + >> #if !HAVE_EXPF >> #undef expf >> #define expf(x) ((float)exp(x)) >> -- >> 2.6.4 >> [...]
Pushed with slight modifications. This is a request to any running MSVC/other platform lacking erf: can you please test to make sure this works? _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel