Hello community, here is the log from the commit of package zita-convolver for openSUSE:Factory checked in at 2018-06-19 12:00:16 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/zita-convolver (Old) and /work/SRC/openSUSE:Factory/.zita-convolver.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "zita-convolver" Tue Jun 19 12:00:16 2018 rev:2 rq:616633 version:4.0.0 Changes: -------- --- /work/SRC/openSUSE:Factory/zita-convolver/zita-convolver.changes 2018-05-29 10:33:31.981714684 +0200 +++ /work/SRC/openSUSE:Factory/.zita-convolver.new/zita-convolver.changes 2018-06-19 12:00:27.673165815 +0200 @@ -1,0 +2,32 @@ +Wed Jun 13 22:11:05 UTC 2018 - [email protected] + +- Add baselibs.conf +- Update to version 4.0.0 + 1. Version 4 now uses int types from <stdint.h> internally and for + the arguments of all member funcions. + 2. The set_density() function has been removed, and the matrix + density hint is now an additional argument to configure(). + + If your application does not use impdata_update(), then (1) + and (2) are the only relevant changes. + + 3. The way impdata_update() works has changed. + This function is used to modify IR data while the convolver is + actually running. It does not use any memory allocation nor + modify internal data structures, and only data in already + existing partitions can be modified this way. + In versions <= 3, this function would *overwrite* any existing + data. From version 4, impdata_update() *adds* to existing IR + data, just as impdata_create() does. So in order to replace an + existing IR, you first need to clear it using the new function + impdata_clear(). This will clear (but not delete) all IR data + for a given input, output pair. + +------------------------------------------------------------------- +Thu Jun 7 21:12:24 UTC 2018 - [email protected] + +- Replace %soname by %sover to better reflect its use. +- Drop bias from description. +- Add Group: line for shared library subpackage. + +------------------------------------------------------------------- Old: ---- zita-convolver-3.1.0.tar.bz2 New: ---- baselibs.conf zita-convolver-4.0.0.tar.bz2 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ zita-convolver.spec ++++++ --- /var/tmp/diff_new_pack.6FGn2A/_old 2018-06-19 12:00:29.957081014 +0200 +++ /var/tmp/diff_new_pack.6FGn2A/_new 2018-06-19 12:00:29.985079975 +0200 @@ -16,15 +16,16 @@ # -%define soname 3 +%define sover 4 Name: zita-convolver -Version: 3.1.0 +Version: 4.0.0 Release: 0 -Summary: A fast, partitioned convolution engine library -License: GPL-3.0+ +Summary: A partitioned convolution engine library +License: GPL-3.0-or-later Group: Development/Libraries/C and C++ URL: https://kokkinizita.linuxaudio.org/linuxaudio/ Source: https://kokkinizita.linuxaudio.org/linuxaudio/downloads/%{name}-%{version}.tar.bz2 +Source99: baselibs.conf BuildRequires: gcc-c++ BuildRequires: pkgconfig BuildRequires: pkgconfig(fftw3f) @@ -43,10 +44,11 @@ No CPU cycles or memory resources are wasted on empty cells in the matrix, nor on empty partitions if IRs are of different length. -%package -n lib%{name}%{soname} -Summary: A fast, partitioned convolution engine library +%package -n lib%{name}%{sover} +Summary: A partitioned convolution engine library +Group: System/Libraries -%description -n lib%{name}%{soname} +%description -n lib%{name}%{sover} Convolution engine based on FFT convolution and using non-uniform partition sizes: small ones at the start of the IR and building up to the most efficient size further on. It can perform zero-delay processing with moderate CPU load. @@ -62,11 +64,12 @@ %package -n %{name}-devel Summary: Development files for zita-convolver -Requires: lib%{name}%{soname} = %{version} +Group: Development/Libraries/C and C++ +Requires: lib%{name}%{sover} = %{version} Requires: pkgconfig(fftw3f) %description -n %{name}-devel -Development package for zita-convolver, a fast, partitioned convolution engine +Development package for zita-convolver, a partitioned convolution engine library. %prep @@ -79,10 +82,10 @@ %install %make_install -C libs PREFIX=%{_prefix} LIBDIR=%{_lib} -%post -n lib%{name}%{soname} -p /sbin/ldconfig -%postun -n lib%{name}%{soname} -p /sbin/ldconfig +%post -n lib%{name}%{sover} -p /sbin/ldconfig +%postun -n lib%{name}%{sover} -p /sbin/ldconfig -%files -n lib%{name}%{soname} +%files -n lib%{name}%{sover} %license COPYING %{_libdir}/lib%{name}.so.* ++++++ baselibs.conf ++++++ libzita-convolver4 ++++++ zita-convolver-3.1.0.tar.bz2 -> zita-convolver-4.0.0.tar.bz2 ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/INSTALL new/zita-convolver-4.0.0/INSTALL --- old/zita-convolver-3.1.0/INSTALL 2011-09-25 18:59:08.000000000 +0200 +++ new/zita-convolver-4.0.0/INSTALL 2018-06-06 17:33:28.000000000 +0200 @@ -11,9 +11,9 @@ ---------------------- To make and install the library, cd to the libs directory, -su root, and 'make install'. This will install the lib and -header file in /usr/lib and /usr/include. To install to -other directories, see the Makefile. After the install -you can do a 'make clean' to return the libs directory +'make', and as root, 'make install'. This will install the +lib and header file in /usr/local/lib and /usr/local/include. +To install to other directories, see the Makefile. After the +install you can do a 'make clean' to return the libs directory to its original state. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/README new/zita-convolver-4.0.0/README --- old/zita-convolver-3.1.0/README 2011-10-12 22:48:01.000000000 +0200 +++ new/zita-convolver-4.0.0/README 2018-06-06 17:31:18.000000000 +0200 @@ -1,32 +1,54 @@ ************************** -* zita-convolver 3.0.1 * +* zita-convolver 4.0.0 * +* Released 2018-06-06 * ************************** - -API changes between 2.x.x and 3.x.x +API changes between 3.x.x and 4.x.x ------------------------------------ -Release 3 will be fully documented, but this could take some time. -Until then, the following should enable you to modify your apps so -they can be compiled and will work well with release 3. +1. Version 4 now uses int types from <stdint.h> + internally and for the arguments of all member + funcions. + +2. The set_density() function has been removed, and the + matrix density hint is now an additional argument to + configure(). + +If your application does not use impdata_update(), +then (1) and (2) are the only relevant changes. + +3. The way impdata_update() works has changed. + +This function is used to modify IR data while the convolver +is actually running. It does not use any memory allocation +nor modify internal data structures, and only data in already +existing partitions can be modified this way. +In versions <= 3, this function would *overwrite* any existing +data. From version 4, impdata_update() *adds* to existing IR +data, just as impdata_create() does. So in order to replace an +existing IR, you first need to clear it using the new function +impdata_clear(). This will clear (but not delete) all IR data +for a given input, output pair. -1. Version test macro and function. +Version test macro and function +------------------------------- If your application depends on this version of zita-convolver, insert the following two code fragments, normally in your main program source file: +This will test for zita-convolver-4.x.x at compile time. ----- #include <zita-convolver.h> -#if ZITA_CONVOLVER_MAJOR_VERSION != 3 -#error "This programs requires zita-convolver 3.x.x" +#if ZITA_CONVOLVER_MAJOR_VERSION != 4 +#error "This program requires zita-convolver 4.x.x" #endif ----- -This will test for zita-convolver-3.x.x at compile time. - +This will check that the compile time and run time libraries +are compatible. ----- if (zita_convolver_major_version () != ZITA_CONVOLVER_MAJOR_VERSION) { @@ -35,126 +57,3 @@ } ----- -This will check that the compile time and run time libraries -are compatible. - -One of the problems with release 2 was that almost any change -would lead to incompatibility with previous versions. The code -in release 3 has been extensively reworked to ensure that small -bugfixes and minor changes will not lead to binary or source -incompatibility. - - -2. Testing run-time flags. - -Release 2 had Convproc::flags(), providing information about -computation threads being late and CPU overload. This function -is no longer available. Convproc::process(), previously a void -function, now returns the same information. The calling thread -should take care of communicating this to any non-realtime context -that may need it. This may involve some code to logically OR bits -with provious values, and to clear this accumulated status when -it is read by e.g. the main event loop. Another way to check for -CPU overload is to monitor Convproc::state(). - - -3. Setting options. - -The functions set_fftwopt() and set_vectopt() are no longer -available. They are replaced by set_options() which takes the -logical OR of some constant as its argument. The possible -values to be ORed are: - - Convproc::OPT_FFTW_MEASURE - Convproc::OPT_VECTOR_MODE - -The first one will make the fftw3 library run some test code -when FFT plans are created. This will improve performance but -lead to a slow-starting application. The second enables some -experimental vector code which can improve performance for -some configurations. Convproc::set_options() must be called -*before* Convproc::configure(), it will be ignored otherwise. - - -4. The 'density' parameter. - -While release 2 used an almost fixed sequence of partition sizes, -release 3 has some new code to optimize this in function of the -number of inputs and outputs, the maximum impulse response length, -and the type of convolution matrix. The 'density' parameter is set -by calling Convproc::set_density() *before* Convproc::configure(). -The value of 'density' should be the fraction of input / output -combinations that actually will be used. When set to zero (or not -set at all), the default value will be 1 / min (Ninp, Nout), which -will work well in many cases, except when you have a dense matrix, -one in which (almost) all outputs depend on (almost) all inputs. -For a fully filled matrix 'density' should be set to 1. - - -5. The 'sync' argument to Convproc::process(). - -Convproc::process() now takes a optional boolean argument 'sync'. -The default value is 'false'. When set to 'true' it makes the -process() call wait for data from auxiliary threads instead of -assuming that these threads have completed their work and that -the data they should provide is available. - -Zita-convolver can be used in three modes: - - A. Batch mode. Use this to compute convolutions of finite - lenght in non-realtime mode. No auxiliary threads are used - in this mode, and there is no need to check the 'late' flags - returned by Convproc::process(). To run a convolver in batch - mode ensure that all three of the 'quantum', 'minpart' and - 'maxpart' arguments to Convproc::configure() are identical. - Doing this does not exclude real-time operation, it just - ensures that all processing will be done in a single thread, - that of the caller of process(). The 'sync' argument is - ignored in this case. - - B. Real-time mode. The normal way to call process() in a - Jack process callback is to set 'sync' to false. In this - case lower-priority threads, if there are any, are assumed - to have finished their work in time, process() will not wait - for them and will never block. - - C. Freewheeling mode. When 'sync' is set to true, process() - will wait at appropiate times for lower priority threads to - have finished. This *must* be done when Jack is running in - 'freewheeling' mode. In theory you could set 'sync' true in - normal Jack mode as well - if all is OK the lower priority - threads should be in time. But it could in theory block - the caller for a long time, so this is not recommended. - - -6. The Convproc::start_process() function. - -As is version 2.0 this call takes two arguments, a thread -priority and a thread scheduling class. In 2.0 the second -defaulted to SCHED_FIFO. In version 3.0 you have to supply -both arguments. If you are using a Convproc in 'batch mode' -(see above) just use zero for both. In all other cases, the -priority and scheduling class values _must_ be those of the -thread that will be calling Convproc::process(), and the -scheduling class _must_ be a real-time one (FIFO or RR). -If this is not observed things may appear to work but will -fail sooner or later. - - -7. Cleaning up. - -As before, Convproc::stop_process() is used to terminate -processing, and Convproc::cleanup() will free all internal -buffers and return a Convproc to its initial state as after -construction. The destructor calls cleanup() as well. -Clearly this must not be done before all auxiliary threads -have terminated. Convproc::cleanup() now checks for this, -and will eventually block for a short time before proceeding. -If you want to avoid this, you can explicitly wait in your -own code, e.g 'while (! C->check_stop()) usleep (100000);', -or you could use a periodic event calling check_stop() in -your main loop. - - - - \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/libs/Makefile new/zita-convolver-4.0.0/libs/Makefile --- old/zita-convolver-3.1.0/libs/Makefile 2011-11-20 22:30:54.000000000 +0100 +++ new/zita-convolver-4.0.0/libs/Makefile 2018-05-27 15:46:18.000000000 +0200 @@ -1,6 +1,6 @@ # ------------------------------------------------------------------------ # -# Copyright (C) 2006-2011 Fons Adriaensen <[email protected]> +# Copyright (C) 2006-2018 Fons Adriaensen <[email protected]> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published @@ -25,12 +25,13 @@ LIBDIR = lib$(SUFFIX) -MAJVERS = 3 -MINVERS = 1.0 +MAJVERS = 4 +MINVERS = 0.0 VERSION = $(MAJVERS).$(MINVERS) -CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -DENABLE_VECTOR_MODE +CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS +CPPFLAGS += -DENABLE_VECTOR_MODE CXXFLAGS += -fPIC -Wall CXXFLAGS += -ffast-math -funroll-loops -O3 CXXFLAGS += -march=native diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/libs/Makefile-linux new/zita-convolver-4.0.0/libs/Makefile-linux --- old/zita-convolver-3.1.0/libs/Makefile-linux 2011-11-20 22:30:37.000000000 +0100 +++ new/zita-convolver-4.0.0/libs/Makefile-linux 2018-05-27 15:46:18.000000000 +0200 @@ -1,6 +1,6 @@ # ------------------------------------------------------------------------ # -# Copyright (C) 2006-2011 Fons Adriaensen <[email protected]> +# Copyright (C) 2006-2018 Fons Adriaensen <[email protected]> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published @@ -25,12 +25,13 @@ LIBDIR = lib$(SUFFIX) -MAJVERS = 3 -MINVERS = 1.0 +MAJVERS = 4 +MINVERS = 0.0 VERSION = $(MAJVERS).$(MINVERS) -CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -DENABLE_VECTOR_MODE +CPPFLAGS += -I. -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS +CPPFLAGS += -DENABLE_VECTOR_MODE CXXFLAGS += -fPIC -Wall CXXFLAGS += -ffast-math -funroll-loops -O3 CXXFLAGS += -march=native diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/libs/Makefile-osx new/zita-convolver-4.0.0/libs/Makefile-osx --- old/zita-convolver-3.1.0/libs/Makefile-osx 2011-12-05 20:37:11.000000000 +0100 +++ new/zita-convolver-4.0.0/libs/Makefile-osx 2018-05-27 15:46:37.000000000 +0200 @@ -1,6 +1,6 @@ # ------------------------------------------------------------------------ # -# Copyright (C) 2006-2011 Fons Adriaensen <[email protected]> +# Copyright (C) 2006-2018 Fons Adriaensen <[email protected]> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published @@ -30,8 +30,8 @@ LIBDIR = lib$(SUFFIX) -MAJVERS = 3 -MINVERS = 1.0 +MAJVERS = 4 +MINVERS = 0.0 VERSION = $(MAJVERS).$(MINVERS) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/libs/zita-convolver.cc new/zita-convolver-4.0.0/libs/zita-convolver.cc --- old/zita-convolver-3.1.0/libs/zita-convolver.cc 2011-11-20 22:16:49.000000000 +0100 +++ new/zita-convolver-4.0.0/libs/zita-convolver.cc 2018-06-01 13:42:59.000000000 +0200 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2011 Fons Adriaensen <[email protected]> +// Copyright (C) 2006-2018 Fons Adriaensen <[email protected]> // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -42,11 +42,27 @@ float Convproc::_fft_cost = 5.0f; +static float *calloc_real (uint32_t k) +{ + float *p = fftwf_alloc_real (k); + if (!p) throw (Converror (Converror::MEM_ALLOC)); + memset (p, 0, k * sizeof (float)); + return p; +} + +static fftwf_complex *calloc_complex (uint32_t k) +{ + fftwf_complex *p = fftwf_alloc_complex (k); + if (!p) throw (Converror (Converror::MEM_ALLOC)); + memset (p, 0, k * sizeof (fftwf_complex)); + return p; +} + + Convproc::Convproc (void) : _state (ST_IDLE), _options (0), _skipcnt (0), - _density (0), _ninp (0), _nout (0), _quantum (0), @@ -63,41 +79,39 @@ Convproc::~Convproc (void) { + stop_process (); cleanup (); } -void Convproc::set_options (unsigned int options) +void Convproc::set_options (uint32_t options) { _options = options; } -void Convproc::set_density (float density) -{ - _density = density; -} - - -void Convproc::set_skipcnt (unsigned int skipcnt) +void Convproc::set_skipcnt (uint32_t skipcnt) { if ((_quantum == _minpart) && (_quantum == _maxpart)) _skipcnt = skipcnt; } -int Convproc::configure (unsigned int ninp, - unsigned int nout, - unsigned int maxsize, - unsigned int quantum, - unsigned int minpart, - unsigned int maxpart) -{ - unsigned int offs, npar, size, pind, nmin, nmax, step, i; - int prio, d, r, s; - float cfft, cmac, t; +int Convproc::configure (uint32_t ninp, + uint32_t nout, + uint32_t maxsize, + uint32_t quantum, + uint32_t minpart, + uint32_t maxpart, + float density) +{ + uint32_t offs, npar, size, pind, nmin, i; + int prio, step, d, r, s; + float cfft, cmac; if (_state != ST_IDLE) return Converror::BAD_STATE; - if ( (quantum & (quantum - 1)) + if ( (ninp < 1) || (ninp > MAXINP) + || (nout < 1) || (nout > MAXOUT) + || (quantum & (quantum - 1)) || (quantum < MINQUANT) || (quantum > MAXQUANT) || (minpart & (minpart - 1)) @@ -108,21 +122,12 @@ || (maxpart > MAXPART) || (maxpart < minpart)) return Converror::BAD_PARAM; - if (ninp < nout) { nmin = ninp; nmax = nout; } - else { nmin = nout; nmax = ninp; } - - if (_density <= 0) _density = 1.0 / nmin; - else - { - t = 1.0f / nmax; - if (_density < t) _density = t; - if (_density > 1) _density = 1; - } - + nmin = (ninp < nout) ? ninp : nout; + if (density <= 0.0f) density = 1.0f / nmin; + if (density > 1.0f) density = 1.0f; cfft = _fft_cost * (ninp + nout); - cmac = _mac_cost * ninp * nout * _density; + cmac = _mac_cost * ninp * nout * density; step = (cfft < 4 * cmac) ? 1 : 2; - if (step == 2) { r = maxpart / minpart; @@ -131,7 +136,6 @@ else s = 1; nmin = (s == 1) ? 2 : 6; if (minpart == quantum) nmin++; - prio = 0; size = quantum; while (size < minpart) @@ -154,7 +158,6 @@ } _convlev [pind] = new Convlevel (); _convlev [pind]->configure (prio, offs, npar, size, _options); - offs += size * npar; if (offs < maxsize) { @@ -188,21 +191,22 @@ } -int Convproc::impdata_create (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1) +int Convproc::impdata_create (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t ind0, + int32_t ind1) { - unsigned int j; + uint32_t j; if (_state != ST_STOP) return Converror::BAD_STATE; + if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM; try { for (j = 0; j < _nlevels; j++) { - _convlev [j]->impdata_create (inp, out, step, data, ind0, ind1); + _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, true); } } catch (...) @@ -214,37 +218,51 @@ } -int Convproc::impdata_update (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1) +int Convproc::impdata_clear (uint32_t inp, uint32_t out) +{ + uint32_t k; + + if (_state < ST_STOP) return Converror::BAD_STATE; + for (k = 0; k < _nlevels; k++) _convlev [k]->impdata_clear (inp, out); + return 0; +} + + +int Convproc::impdata_update (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t ind0, + int32_t ind1) { - unsigned int j; + uint32_t j; if (_state < ST_STOP) return Converror::BAD_STATE; + if ((inp >= _ninp) || (out >= _nout)) return Converror::BAD_PARAM; for (j = 0; j < _nlevels; j++) { - _convlev [j]->impdata_update (inp, out, step, data, ind0, ind1); + _convlev [j]->impdata_write (inp, out, step, data, ind0, ind1, false); } return 0; } -int Convproc::impdata_copy (unsigned int inp1, - unsigned int out1, - unsigned int inp2, - unsigned int out2) +int Convproc::impdata_link (uint32_t inp1, + uint32_t out1, + uint32_t inp2, + uint32_t out2) { - unsigned int j; - + uint32_t j; + + if ((inp1 >= _ninp) || (out1 >= _nout)) return Converror::BAD_PARAM; + if ((inp2 >= _ninp) || (out2 >= _nout)) return Converror::BAD_PARAM; + if ((inp1 == inp2) && (out1 == out2)) return Converror::BAD_PARAM; if (_state != ST_STOP) return Converror::BAD_STATE; try { for (j = 0; j < _nlevels; j++) { - _convlev [j]->impdata_copy (inp1, out1, inp2, out2); + _convlev [j]->impdata_link (inp1, out1, inp2, out2); } } catch (...) @@ -258,7 +276,7 @@ int Convproc::reset (void) { - unsigned int k; + uint32_t k; if (_state == ST_IDLE) return Converror::BAD_STATE; for (k = 0; k < _ninp; k++) memset (_inpbuff [k], 0, _inpsize * sizeof (float)); @@ -270,17 +288,17 @@ int Convproc::start_process (int abspri, int policy) { - unsigned int k; + uint32_t k; if (_state != ST_STOP) return Converror::BAD_STATE; - _latecnt = 0; _inpoffs = 0; _outoffs = 0; reset (); + for (k = (_minpart == _quantum) ? 1 : 0; k < _nlevels; k++) { - _convlev [k]->start (abspri, policy); + _convlev [k]->start (abspri, policy); } _state = ST_PROC; return 0; @@ -289,14 +307,12 @@ int Convproc::process (bool sync) { - unsigned int k; - int f = 0; + uint32_t k; + int f = 0; if (_state != ST_PROC) return 0; - _inpoffs += _quantum; if (_inpoffs == _inpsize) _inpoffs = 0; - _outoffs += _quantum; if (_outoffs == _minpart) { @@ -309,7 +325,7 @@ { if (++_latecnt >= 5) { - stop_process (); + if (~_options & OPT_LATE_CONTIN) stop_process (); f |= FL_LOAD; } } @@ -321,7 +337,7 @@ int Convproc::stop_process (void) { - unsigned int k; + uint32_t k; if (_state != ST_PROC) return Converror::BAD_STATE; for (k = 0; k < _nlevels; k++) _convlev [k]->stop (); @@ -332,17 +348,12 @@ int Convproc::cleanup (void) { - unsigned int k; + uint32_t k; while (! check_stop ()) { usleep (100000); } - if (_state != ST_STOP) - { - return Converror::BAD_STATE; - } - for (k = 0; k < _ninp; k++) { delete[] _inpbuff [k]; @@ -362,7 +373,6 @@ _state = ST_IDLE; _options = 0; _skipcnt = 0; - _density = 0; _ninp = 0; _nout = 0; _quantum = 0; @@ -376,7 +386,7 @@ bool Convproc::check_stop (void) { - unsigned int k; + uint32_t k; for (k = 0; (k < _nlevels) && (_convlev [k]->_stat == Convlevel::ST_IDLE); k++); if (k == _nlevels) @@ -390,7 +400,7 @@ void Convproc::print (FILE *F) { - unsigned int k; + uint32_t k; for (k = 0; k < _nlevels; k++) _convlev [k]->print (F); } @@ -424,21 +434,11 @@ } -void *Convlevel::alloc_aligned (size_t size) -{ - void *p; - - if (posix_memalign (&p, 16, size)) throw (Converror (Converror::MEM_ALLOC)); - memset (p, 0, size); - return p; -} - - -void Convlevel::configure (int prio, - unsigned int offs, - unsigned int npar, - unsigned int parsize, - unsigned int options) +void Convlevel::configure (int prio, + uint32_t offs, + uint32_t npar, + uint32_t parsize, + uint32_t options) { int fftwopt = (options & OPT_FFTW_MEASURE) ? FFTW_MEASURE : FFTW_ESTIMATE; @@ -448,9 +448,9 @@ _parsize = parsize; _options = options; - _time_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float))); - _prep_data = (float *)(alloc_aligned (2 * _parsize * sizeof (float))); - _freq_data = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex))); + _time_data = calloc_real (2 * _parsize); + _prep_data = calloc_real (2 * _parsize); + _freq_data = calloc_complex (_parsize + 1); _plan_r2c = fftwf_plan_dft_r2c_1d (2 * _parsize, _time_data, _freq_data, fftwopt); _plan_c2r = fftwf_plan_dft_c2r_1d (2 * _parsize, _freq_data, _time_data, fftwopt); if (_plan_r2c && _plan_c2r) return; @@ -458,42 +458,49 @@ } -void Convlevel::impdata_create (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int i0, - int i1) -{ - unsigned int k; - int j, j0, j1, n; - float norm; - fftwf_complex *fftb; - Macnode *M; +void Convlevel::impdata_write (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t i0, + int32_t i1, + bool create) +{ + uint32_t k; + int32_t j, j0, j1, n; + float norm; + fftwf_complex *fftb; + Macnode *M; n = i1 - i0; i0 = _offs - i0; i1 = i0 + _npar * _parsize; if ((i0 >= n) || (i1 <= 0)) return; - M = findmacnode (inp, out, true); - if (! (M->_fftb)) + if (create) { - M->_fftb = new fftwf_complex * [_npar]; - memset (M->_fftb, 0, _npar * sizeof (fftwf_complex *)); + M = findmacnode (inp, out, true); + if (M == 0 || M->_link) return; + if (M->_fftb == 0) M->alloc_fftb (_npar); } - + else + { + M = findmacnode (inp, out, false); + if (M == 0 || M->_link || M->_fftb == 0) return; + } + norm = 0.5f / _parsize; for (k = 0; k < _npar; k++) { i1 = i0 + _parsize; if ((i0 < n) && (i1 > 0)) { - if (! (M->_fftb [k])) - { - M->_fftb [k] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex))); + fftb = M->_fftb [k]; + if (fftb == 0 && create) + { + M->_fftb [k] = fftb = calloc_complex (_parsize + 1); } - if (data) + if (fftb && data) { memset (_prep_data, 0, 2 * _parsize * sizeof (float)); j0 = (i0 < 0) ? 0 : i0; @@ -503,7 +510,6 @@ #ifdef ENABLE_VECTOR_MODE if (_options & OPT_VECTOR_MODE) fftswap (_freq_data); #endif - fftb = M->_fftb [k]; for (j = 0; j <= (int)_parsize; j++) { fftb [j][0] += _freq_data [j][0]; @@ -516,52 +522,27 @@ } -void Convlevel::impdata_update (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int i0, - int i1) -{ - unsigned int k; - int j, j0, j1, n; - float norm; - fftwf_complex *fftb; - Macnode *M; +void Convlevel::impdata_clear (uint32_t inp, uint32_t out) +{ + uint32_t i; + Macnode *M; M = findmacnode (inp, out, false); - if (! M) return; - - n = i1 - i0; - i0 = _offs - i0; - i1 = i0 + _npar * _parsize; - if ((i0 >= n) || (i1 <= 0)) return; - - norm = 0.5f / _parsize; - for (k = 0; k < _npar; k++) + if (M == 0 || M->_link || M->_fftb == 0) return; + for (i = 0; i < _npar; i++) { - i1 = i0 + _parsize; - fftb = M->_fftb [k]; - if (fftb && (i0 < n) && (i1 > 0)) - { - memset (_prep_data, 0, 2 * _parsize * sizeof (float)); - j0 = (i0 < 0) ? 0 : i0; - j1 = (i1 > n) ? n : i1; - for (j = j0; j < j1; j++) _prep_data [j - i0] = norm * data [j * step]; - fftwf_execute_dft_r2c (_plan_r2c, _prep_data, fftb); -#ifdef ENABLE_VECTOR_MODE - if (_options & OPT_VECTOR_MODE) fftswap (fftb); -#endif + if (M->_fftb [i]) + { + memset (M->_fftb [i], 0, (_parsize + 1) * sizeof (fftwf_complex)); } - i0 = i1; } } -void Convlevel::impdata_copy (unsigned int inp1, - unsigned int out1, - unsigned int inp2, - unsigned int out2) +void Convlevel::impdata_link (uint32_t inp1, + uint32_t out1, + uint32_t inp2, + uint32_t out2) { Macnode *M1; Macnode *M2; @@ -569,18 +550,17 @@ M1 = findmacnode (inp1, out1, false); if (! M1) return; M2 = findmacnode (inp2, out2, true); - if (M2->_fftb) return; - M2->_fftb = M1->_fftb; - M2->_copy = true; + M2->free_fftb (); + M2->_link = M1; } -void Convlevel::reset (unsigned int inpsize, - unsigned int outsize, +void Convlevel::reset (uint32_t inpsize, + uint32_t outsize, float **inpbuff, float **outbuff) { - unsigned int i; + uint32_t i; Inpnode *X; Outnode *Y; @@ -658,7 +638,6 @@ void Convlevel::cleanup (void) { - unsigned int i; Inpnode *X, *X1; Outnode *Y, *Y1; Macnode *M, *M1; @@ -666,8 +645,6 @@ X = _inp_list; while (X) { - for (i = 0; i < _npar; i++) free (X->_ffta [i]); - delete[] X->_ffta; X1 = X->_next; delete X; X = X1; @@ -680,19 +657,10 @@ M = Y->_list; while (M) { - if ((M->_fftb) && !(M->_copy)) - { - for (i = 0; i < _npar; i++) - { - free (M->_fftb [i]); - } - delete[] M->_fftb; - } M1 = M->_next; delete M; M = M1; } - for (i = 0; i < 3; i++) free (Y->_buff [i]); Y1 = Y->_next; delete Y; Y = Y1; @@ -701,9 +669,9 @@ fftwf_destroy_plan (_plan_r2c); fftwf_destroy_plan (_plan_c2r); - free (_time_data); - free (_prep_data); - free (_freq_data); + fftwf_free (_time_data); + fftwf_free (_prep_data); + fftwf_free (_freq_data); _plan_r2c = 0; _plan_c2r = 0; _time_data = 0; @@ -739,9 +707,7 @@ void Convlevel::process (bool skip) { - unsigned int i, j, k; - unsigned int i1, n1, n2, opi1, opi2; - + uint32_t i, i1, j, k, n1, n2, opi1, opi2; Inpnode *X; Macnode *M; Outnode *Y; @@ -796,7 +762,7 @@ for (j = 0; j < _npar; j++) { ffta = X->_ffta [i]; - fftb = M->_fftb [j]; + fftb = M->_link ? M->_link->_fftb [j] : M->_fftb [j]; if (fftb) { #ifdef ENABLE_VECTOR_MODE @@ -847,11 +813,11 @@ } -int Convlevel::readout (bool sync, unsigned int skipcnt) +int Convlevel::readout (bool sync, uint32_t skipcnt) { - unsigned int i; - float *p, *q; - Outnode *Y; + uint32_t i; + float *p, *q; + Outnode *Y; _outoffs += _outsize; if (_outoffs == _parsize) @@ -893,58 +859,38 @@ } -Macnode *Convlevel::findmacnode (unsigned int inp, unsigned int out, bool create) +Macnode *Convlevel::findmacnode (uint32_t inp, uint32_t out, bool create) { - unsigned int i; - Inpnode *X; - Outnode *Y; - Macnode *M; + Inpnode *X; + Outnode *Y; + Macnode *M; for (X = _inp_list; X && (X->_inp != inp); X = X->_next); if (! X) { if (! create) return 0; - X = new Inpnode; + X = new Inpnode (inp); X->_next = _inp_list; _inp_list = X; - X->_inp = inp; - X->_ffta = new fftwf_complex * [_npar]; - memset (X->_ffta, 0, _npar * sizeof (fftw_complex *)); - for (i = 0; i < _npar; i++) - { - X->_ffta [i] = (fftwf_complex *)(alloc_aligned ((_parsize + 1) * sizeof (fftwf_complex))); - } + X->alloc_ffta (_npar, _parsize); } for (Y = _out_list; Y && (Y->_out != out); Y = Y->_next); if (! Y) { if (! create) return 0; - Y = new Outnode; + Y = new Outnode (out, _parsize); Y->_next = _out_list; _out_list = Y; - Y->_out = out; - Y->_list = 0; - for (i = 0; i < 3; i++) - { - Y->_buff [i] = 0; - } - for (i = 0; i < 3; i++) - { - Y->_buff [i] = (float *)(alloc_aligned (_parsize * sizeof (float))); - } } for (M = Y->_list; M && (M->_inpn != X); M = M->_next); if (! M) { if (! create) return 0; - M = new Macnode; + M = new Macnode (X); M->_next = Y->_list; Y->_list = M; - M->_inpn = X; - M->_fftb = 0; - M->_copy = false; } return M; @@ -955,8 +901,8 @@ void Convlevel::fftswap (fftwf_complex *p) { - unsigned int n = _parsize; - float a, b; + uint32_t n = _parsize; + float a, b; while (n) { @@ -974,3 +920,98 @@ #endif +Inpnode::Inpnode (uint16_t inp): + _next (0), + _ffta (0), + _npar (0), + _inp (inp) +{ +} + + +Inpnode::~Inpnode (void) +{ + free_ffta (); +} + + +void Inpnode::alloc_ffta (uint16_t npar, int32_t size) +{ + _npar = npar; + _ffta = new fftwf_complex * [_npar]; + for (int i = 0; i < _npar; i++) + { + _ffta [i] = calloc_complex (size + 1); + } +} + + +void Inpnode::free_ffta (void) +{ + if (!_ffta) return; + for (uint16_t i = 0; i < _npar; i++) + { + fftwf_free ( _ffta [i]); + } + delete[] _ffta; + _ffta = 0; + _npar = 0; +} + + +Macnode::Macnode (Inpnode *inpn): + _next (0), + _inpn (inpn), + _link (0), + _fftb (0), + _npar (0) +{} + + +Macnode::~Macnode (void) +{ + free_fftb (); +} + + +void Macnode::alloc_fftb (uint16_t npar) +{ + _npar = npar; + _fftb = new fftwf_complex * [_npar]; + for (uint16_t i = 0; i < _npar; i++) + { + _fftb [i] = 0; + } +} + + +void Macnode::free_fftb (void) +{ + if (!_fftb) return; + for (uint16_t i = 0; i < _npar; i++) + { + fftwf_free ( _fftb [i]); + } + delete[] _fftb; + _fftb = 0; + _npar = 0; +} + + +Outnode::Outnode (uint16_t out, int32_t size): + _next (0), + _list (0), + _out (out) +{ + _buff [0] = calloc_real (size); + _buff [1] = calloc_real (size); + _buff [2] = calloc_real (size); +} + + +Outnode::~Outnode (void) +{ + fftwf_free (_buff [0]); + fftwf_free (_buff [1]); + fftwf_free (_buff [2]); +} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/zita-convolver-3.1.0/libs/zita-convolver.h new/zita-convolver-4.0.0/libs/zita-convolver.h --- old/zita-convolver-3.1.0/libs/zita-convolver.h 2011-12-02 22:22:15.000000000 +0100 +++ new/zita-convolver-4.0.0/libs/zita-convolver.h 2018-06-01 10:22:18.000000000 +0200 @@ -1,6 +1,6 @@ // ---------------------------------------------------------------------------- // -// Copyright (C) 2006-2011 Fons Adriaensen <[email protected]> +// Copyright (C) 2006-2018 Fons Adriaensen <[email protected]> // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -23,11 +23,12 @@ #include <pthread.h> +#include <stdint.h> #include <fftw3.h> -#define ZITA_CONVOLVER_MAJOR_VERSION 3 -#define ZITA_CONVOLVER_MINOR_VERSION 1 +#define ZITA_CONVOLVER_MAJOR_VERSION 4 +#define ZITA_CONVOLVER_MINOR_VERSION 0 extern int zita_convolver_major_version (void); @@ -42,11 +43,7 @@ #endif -#if defined(__linux__) || defined(__FreeBSD_kernel__) || defined(__GNU__) - -// NOTE: __FreeBSD_kernel__ and __GNU__ were added by the Debian maintainers -// (the latter for the HURD version of Debian). Things are reported to work -// with some applications but probably have not been tested in depth. +#if defined(__linux__) || defined(__GNU__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) #include <semaphore.h> @@ -163,9 +160,15 @@ friend class Convlevel; + Inpnode (uint16_t inp); + ~Inpnode (void); + void alloc_ffta (uint16_t npar, int32_t size); + void free_ffta (void); + Inpnode *_next; fftwf_complex **_ffta; - unsigned int _inp; + uint16_t _npar; + uint16_t _inp; }; @@ -175,10 +178,16 @@ friend class Convlevel; + Macnode (Inpnode *inpn); + ~Macnode (void); + void alloc_fftb (uint16_t npar); + void free_fftb (void); + Macnode *_next; Inpnode *_inpn; + Macnode *_link; fftwf_complex **_fftb; - bool _copy; + uint16_t _npar; }; @@ -188,10 +197,13 @@ friend class Convlevel; + Outnode (uint16_t out, int32_t size); + ~Outnode (void); + Outnode *_next; Macnode *_list; float *_buff [3]; - unsigned int _out; + uint16_t _out; }; @@ -206,13 +218,10 @@ MEM_ALLOC = -3 }; -private: - - friend class Convlevel; - friend class Convproc; - Converror (int error) : _error (error) {} +private: + int _error; }; @@ -226,7 +235,8 @@ enum { OPT_FFTW_MEASURE = 1, - OPT_VECTOR_MODE = 2 + OPT_VECTOR_MODE = 2, + OPT_LATE_CONTIN = 4 }; enum @@ -239,43 +249,38 @@ Convlevel (void); ~Convlevel (void); - void *alloc_aligned (size_t size); - - void configure (int prio, - unsigned int offs, - unsigned int npar, - unsigned int parsize, - unsigned int options); - - void impdata_create (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1); - - void impdata_update (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1); - - void impdata_copy (unsigned int inp1, - unsigned int out1, - unsigned int inp2, - unsigned int out2); - - void reset (unsigned int inpsize, - unsigned int outsize, - float **inpbuff, - float **outbuff); + void configure (int prio, + uint32_t offs, + uint32_t npar, + uint32_t parsize, + uint32_t options); + + void impdata_write (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t ind0, + int32_t ind1, + bool create); + + void impdata_clear (uint32_t inp, + uint32_t out); + + void impdata_link (uint32_t inp1, + uint32_t out1, + uint32_t inp2, + uint32_t out2); + + void reset (uint32_t inpsize, + uint32_t outsize, + float **inpbuff, + float **outbuff); void start (int absprio, int policy); void process (bool sync); - int readout (bool sync, unsigned int skipcnt); + int readout (bool sync, uint32_t skipcnt); void stop (void); @@ -289,34 +294,35 @@ void main (void); - Macnode *findmacnode (unsigned int inp, unsigned int out, bool create); + Macnode *findmacnode (uint32_t inp, uint32_t out, bool create); - volatile unsigned int _stat; // current processing state - int _prio; // relative priority - unsigned int _offs; // offset from start of impulse response - unsigned int _npar; // number of partitions - unsigned int _parsize; // partition and outbut buffer size - unsigned int _outsize; // step size for output buffer - unsigned int _outoffs; // offset into output buffer - unsigned int _inpsize; // size of shared input buffer - unsigned int _inpoffs; // offset into input buffer - unsigned int _options; // various options - unsigned int _ptind; // rotating partition index - unsigned int _opind; // rotating output buffer index - int _bits; // bit identifiying this level - int _wait; // number of unfinished cycles - pthread_t _pthr; // posix thread executing this level - ZCsema _trig; // sema used to trigger a cycle - ZCsema _done; // sema used to wait for a cycle - Inpnode *_inp_list; // linked list of active inputs - Outnode *_out_list; // linked list of active outputs - fftwf_plan _plan_r2c; // FFTW plan, forward FFT - fftwf_plan _plan_c2r; // FFTW plan, inverse FFT - float *_time_data; // workspace - float *_prep_data; // workspace - fftwf_complex *_freq_data; // workspace - float **_inpbuff; // array of shared input buffers - float **_outbuff; // array of shared output buffers + + volatile uint32_t _stat; // current processing state + int _prio; // relative priority + uint32_t _offs; // offset from start of impulse response + uint32_t _npar; // number of partitions + uint32_t _parsize; // partition and outbut buffer size + uint32_t _outsize; // step size for output buffer + uint32_t _outoffs; // offset into output buffer + uint32_t _inpsize; // size of shared input buffer + uint32_t _inpoffs; // offset into input buffer + uint32_t _options; // various options + uint32_t _ptind; // rotating partition index + uint32_t _opind; // rotating output buffer index + int _bits; // bit identifiying this level + int _wait; // number of unfinished cycles + pthread_t _pthr; // posix thread executing this level + ZCsema _trig; // sema used to trigger a cycle + ZCsema _done; // sema used to wait for a cycle + Inpnode *_inp_list; // linked list of active inputs + Outnode *_out_list; // linked list of active outputs + fftwf_plan _plan_r2c; // FFTW plan, forward FFT + fftwf_plan _plan_c2r; // FFTW plan, inverse FFT + float *_time_data; // workspace + float *_prep_data; // workspace + fftwf_complex *_freq_data; // workspace + float **_inpbuff; // array of shared input buffers + float **_outbuff; // array of shared output buffers }; @@ -347,7 +353,8 @@ enum { OPT_FFTW_MEASURE = Convlevel::OPT_FFTW_MEASURE, - OPT_VECTOR_MODE = Convlevel::OPT_VECTOR_MODE + OPT_VECTOR_MODE = Convlevel::OPT_VECTOR_MODE, + OPT_LATE_CONTIN = Convlevel::OPT_LATE_CONTIN }; enum @@ -362,90 +369,100 @@ MAXQUANT = 8192 }; - unsigned int state (void) const + uint32_t state (void) const { return _state; } - float *inpdata (unsigned int inp) const + float *inpdata (uint32_t inp) const { return _inpbuff [inp] + _inpoffs; } - float *outdata (unsigned int out) const + float *outdata (uint32_t out) const { return _outbuff [out] + _outoffs; } - void set_density (float density); - - void set_options (unsigned int options); - - void set_skipcnt (unsigned int skipcnt); - - int configure (unsigned int ninp, - unsigned int nout, - unsigned int maxsize, - unsigned int quantum, - unsigned int minpart, - unsigned int maxpart); - - int impdata_create (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1); - - int impdata_update (unsigned int inp, - unsigned int out, - unsigned int step, - float *data, - int ind0, - int ind1); + int configure (uint32_t ninp, + uint32_t nout, + uint32_t maxsize, + uint32_t quantum, + uint32_t minpart, + uint32_t maxpart, + float density); + + int impdata_create (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t ind0, + int32_t ind1); + + int impdata_clear (uint32_t inp, + uint32_t out); + + int impdata_update (uint32_t inp, + uint32_t out, + int32_t step, + float *data, + int32_t ind0, + int32_t ind1); + + int impdata_link (uint32_t inp1, + uint32_t out1, + uint32_t inp2, + uint32_t out2); + + // Deprecated, use impdata_link() instead. + int impdata_copy (uint32_t inp1, + uint32_t out1, + uint32_t inp2, + uint32_t out2) + { + return impdata_link (inp1, out1, inp2, out2); + } + + void set_options (uint32_t options); - int impdata_copy (unsigned int inp1, - unsigned int out1, - unsigned int inp2, - unsigned int out2); + void set_skipcnt (uint32_t skipcnt); - int reset (void); + int reset (void); - int start_process (int abspri, int policy); + int start_process (int abspri, int policy); - int process (bool sync = false); + int process (bool sync = false); - int stop_process (void); + int stop_process (void); bool check_stop (void); - int cleanup (void); + int cleanup (void); void print (FILE *F = stdout); - static float _mac_cost; - static float _fft_cost; - private: - unsigned int _state; // current state - float *_inpbuff [MAXINP]; // input buffers - float *_outbuff [MAXOUT]; // output buffers - unsigned int _inpoffs; // current offset in input buffers - unsigned int _outoffs; // current offset in output buffers - unsigned int _options; // option bits - unsigned int _skipcnt; // number of frames to skip - float _density; // matrix density hint - unsigned int _ninp; // number of inputs - unsigned int _nout; // number of outputs - unsigned int _quantum; // processing block size - unsigned int _minpart; // smallest partition size - unsigned int _maxpart; // largest allowed partition size - unsigned int _nlevels; // number of partition sizes - unsigned int _inpsize; // size of input buffers - unsigned int _latecnt; // count of cycles ending too late - Convlevel *_convlev [MAXLEV]; // array of processors - void *_dummy [64]; + uint32_t _state; // current state + float *_inpbuff [MAXINP]; // input buffers + float *_outbuff [MAXOUT]; // output buffers + uint32_t _inpoffs; // current offset in input buffers + uint32_t _outoffs; // current offset in output buffers + uint32_t _options; // option bits + uint32_t _skipcnt; // number of frames to skip + uint32_t _ninp; // number of inputs + uint32_t _nout; // number of outputs + uint32_t _quantum; // processing block size + uint32_t _minpart; // smallest partition size + uint32_t _maxpart; // largest allowed partition size + uint32_t _nlevels; // number of partition sizes + uint32_t _inpsize; // size of input buffers + uint32_t _latecnt; // count of cycles ending too late + Convlevel *_convlev [MAXLEV]; // array of processors + void *_dummy [64]; + + static float _mac_cost; + static float _fft_cost; };
