[Faudiostream-users] tabulateNd().cub vs 2nd order Taylor approximation

Oleg Nesterov Tue, 09 May 2023 09:15:03 -0700

See the attached test-case. If you want to compile it, you need
fpp (https://github.com/oleg-nesterov/fpp), maxima, and Linux.


Compiled with '-a plot.cpp'

        $ ./test-plot -n 100000 | tail -n 1
        0.00285841338           0.000623551081

As you can see, taylor() uses less memory and it is more accurate
at least in this particular case. And it is more than 3 times faster
according to '-a bench.cpp'.

Once again, I am not trying to criticise your (nice!) implementation,
I just want to share the experimental results.

And of course, the implementation of taylor() is just POC, it is
suboptimal, only works for 2d case, etc. Not to mention that it is not
in pure Faust language ;)

Oleg.

import("stdfaust.lib");

fxy(x,y) = sin(x)*cos(y);

rx0 = 0;
rx1 = 2*ma.PI;
ry0 = 0;
ry1 = 2*ma.PI;

// table size = 128*128 = 16384
tabcub(x,y) = tabulateNd(1, fxy, (128,128, rx0,ry0, rx1,ry1, x,y)).cub;

// table size = 32*32*6 = 6144
taylor = tl22(${sin(x)*cos(y)}, 32,32, rx0,ry0, rx1,ry1);

N = 100000;

maxerr = abs : max ~ _;

process = (ba.time%N)/N * 2*ma.PI
        <: fxy - tabcub, fxy - taylor
         : par(i,2, maxerr);

// ----------------------------------------------------------------------------
// taylor approximation, 2 vars, 2nd order

mk_tl22tab = FPP(fxy, szx,szy, rx0,ry0, rx1,ry1) pure eval {
PERL:   sub mk_tltab
        {
                my ($FXY, $SZX,$SZY, $RX0,$RY0, $RX1,$RY1) = @_;
                s|[fL]$|| for $RX0,$RY0, $RX1,$RY1;

                my $mac = qq{block(
                FXY(x,y) := $FXY,
                [SZX,SZY, RX0,RY0, RX1,RY1] : [$SZX,$SZY, $RX0,$RY0, $RX1,$RY1],

                define(Fx(x,y),  diff(FXY(x,y), x)),
                define(Fy(x,y),  diff(FXY(x,y), y)),
                define(Fxy(x,y), diff(FXY(x,y), x,1,y,1)),
                define(Fxx(x,y), diff(FXY(x,y), x,2) / 2),
                define(Fyy(x,y), diff(FXY(x,y), y,2) / 2),

                dx: (RX1-RX0)/SZX,
                dy: (RY1-RY0)/SZY,
                for nx: 0 thru SZX-1 do (
                        xc: RX0 + dx * (nx + 1/2),
                        printf(true, "{~%"),
                for ny: 0 thru SZY-1 do (
                        yc: RY0 + dy * (ny + 1/2),
                        printf(true, "~t{ "),

                        printf(true, "~g, ", FXY(xc,yc)),
                        printf(true, "~g,~g, ", Fx(xc,yc),Fy(xc,yc)),
                        printf(true, "~g,~g,~g, ", Fxy(xc,yc), 
Fxx(xc,yc),Fyy(xc,yc)),

                        printf(true, " },~%")),
                        printf(true, " },~%")
                ))\$};

                qx{maxima --very-quiet <<<'$mac'};
        }

file:
        #define FLOAT $!(${pp_cpp::O_FLOAT})
        static FLOAT $taylor(FLOAT x, FLOAT y)
        {
                static const FLOAT taylor[$szx][$szy][6] = {
                        $!mk_tltab($fxy, $szx,$szy, $rx0,$ry0, $rx1,$ry1)
                };

                int nx = (x - $rx0) * $(szx/(rx1-rx0));
                int ny = (y - $ry0) * $(szy/(ry1-ry0));
                const FLOAT *const tl = taylor[nx][ny];

                FLOAT dx = x - ($rx0 + $((rx1-rx0)/szx) * (nx + .5));
                FLOAT dy = y - ($ry0 + $((ry1-ry0)/szy) * (ny + .5));

                return tl[0] + tl[1]*dx + tl[2]*dy + tl[3]*dx*dy + tl[4]*dx*dx 
+ tl[5]*dy*dy;
        }
        #undef FLOAT

exec:   $taylor
};

tl22 = FPP(taylor, x,y) pure { ($taylor($x,$y)) } (mk_tl22tab);

// ----------------------------------------------------------------------------
// copied from basics.lib, last commit aa5660b

tabulateNd(C,function,parameters) =
    environment {
        val =
            parameters
            // our sizes can be int, should be faster to calculate
            : (par(i, N, int),si.bus(N*3))
              // table size, waveform, read index
              <: (tableSize,wf,readIndex(idsGetRoundedNotFloored))
            : rdtable
        with {
        // for val we want the rounded version of id, the interpolators need 
the rounded down version
        // see: https://github.com/grame-cncm/faustlibraries/pull/152
        idsGetRoundedNotFloored =
            // table sizes and ids for each dimension
            sizesIds:
            (bs,par(i, N, _+0.5));
        };

        lin =
            parameters
            // our sizes can be int, should be faster to calculate
            : (par(i, N, int),si.bus(N*3))
              // the mixers need the float indexes and the values read from the 
tables
              <: (ids,tables(nrReadIndexes,readIndexes))
                 // the actual interpolation
            :mixers(0,nrReadIndexes)
        with {
            // the read indexes to form the closest points in the grid 
surrounding the point of interest.
            readIndexes =
                // for cleaner block diagrams
                si.bus(nParams) <:
                // the closest stored value at or below the point of interest
                ((readIndex(sizesIds) <:si.bus(nrReadIndexes))
                 // the offsets to get to the points around it
                , offsets)
                // add them up
                : ro.interleave(nrReadIndexes,2) : par(i, nrReadIndexes, +) ;
            offsets =
                // the size of a step for each dimension
                stepSizes
                // for each read index
                <: par(i, nrReadIndexes,
                       // sum up the N components that determine the final 
offset
                       par(j, N, switch(i,j)):>_)
            with {
                // the truth table of which step size to use is a binary 
counting table
                // so we use a variant on a function I wrote for slidingReduce
                switch(i,j) = _*int2bin(j,i,nrReadIndexes);
            };
            // since each interpolator has 2 inputs
            nrReadIndexes = pow(2,N);
        };

        cub =
            // same as .lin so far
            parameters
            : (par(i, N, int),si.bus(N*3))
              <: (ids,tables(nrReadIndexes,readIndexes))
            :mixers(1,nrReadIndexes)
        with {
            readIndexes =
                si.bus(nParams) <:
                ((stepSizes:ro.cross(N))
                , readIndex(sizesIds))
                // calculate the offsets for each interpolation read index and 
add them to the "base" readindex
                :cubifiers;
            // since each interpolator has 4 inputs
            nrReadIndexes = pow(4,N);
            // calculate the offsets and add them to the "base" readindex
            // we do this by creating a tree, where each branch has 4 
sub-branches
            // there are N splits, so we end up with nrReadIndexes "leaves"
            cubifiers =
                // iterate trough the dimensions, each feeding into the next
                seq(i, N,
                    si.bus(N-i-1),cubifier(pow(4,i)));
            // take a step size and ``len`` input indexes and create 4*len 
output indexes
            // offset the input index(es) with the amount needed for that 
dimension
            cubifier(len) =
                ((_<:si.bus(len*4))
                , (si.bus(len)<:si.bus(len*4)))
                : ro.interleave(len*4,2)
                :par(i, 4,
                     par(j, len, off(i)+_))
            with {
                // the hardcoded numbers are the same offsets as in 
``ba.tabulate``
                // but they get multiplied by the step size of the current 
dimension
                off(0,stepSize) = -1*stepSize;
                off(1,stepSize) = 0;
                off(2,stepSize) = 1*stepSize;
                off(3,stepSize) = 2*stepSize;
            };
        };

        // how many parameters have we been given?
        nParams = outputs(parameters);
        // the number of dimensions
        N = int(nParams/4);
        // the values we read from the tables
        tables(nrReadIndexes,readIndexes) =
            si.bus(nParams)<:
            // table size, waveform, read index for each table
            ((tableSize<:si.bus(nrReadIndexes))
            , (wf<:si.bus(nrReadIndexes))
            , readIndexes)
            :ro.interleave(nrReadIndexes,3)
             // the actual tables
            :par(i, nrReadIndexes, rdtable);

        // the interpolators
        mixers(linCub,nrReadIndexes)=
            (ro.cross(N),si.bus(nrReadIndexes))
            : seq(i, N, mixer(linCub,i));

        // the interpolator for a single dimension
        // linear version
        mixer(0,i) =
            mixerUniversal(i,2,(_,!,_),it.interpolate_linear) ;
        // cubic version
        mixer(1,i) =
            mixerUniversal(i,4,(_,!,_,!,_,!,_),it.interpolate_cubic) ;

        // i is the current dimension
        // mult is the number of inputs of each interpolator
        // sieve tells us which outputs to let trough and which to block, more 
on this soon
        mixerUniversal(i,mult,sieve,it) =
            // bypass the weights needed for the next dimension
            si.bus(N-i-1),
            // split our own weight
            // in the end we need one weight per interpolator
            // but since we need to interleave these with nrInterpolators*mult 
read values, we split it into as many busses as we have read values
            (((_<:si.bus(nrInterpolators(i)*mult))
              // the read values bypass this step
             , (si.bus(nrInterpolators(i)*mult)))
             // interleave the weights with the read values
             : ro.interleave(nrInterpolators(i)*mult,2)
               // the actual interpolators
             : par(i, nrInterpolators(i),
                   // take the id and turn it into the weight for this dimension
                   ((_<:(_-int(_)))
                    // throw away the extra weights we created for the 
interleave
                   ,sieve)
                   // a single interpolator
                   :it))
        with {
            // the number of interpolators for this dimension
            nrInterpolators(i) = pow(mult,N-i-1);
        };

        // total size of the table: s(0) * s(1)  ...  * s(N-2) * s(N-1)
        // N in, 1 out
        size(1) = _;
        size(N) = _*size(N-1);
        tableSize = size(N),par(i, N*3, !);
        // the size of a step for each dimension.
        // the first is 1, the second is the size of the first dimension times 
one
        // the third is the second number, times the size of the second 
dimension
        stepSizes =
            (int(1),si.bus(N-1),par(i, 3*N+1, !))
            : seq(i, N-1,
                  ((si.bus(i),(_<:(_,_)), si.bus(N-i-1))
                   :(si.bus(i+1),*,si.bus(N-i-2))));
        // Prepare the 'float' table read index for one parameter
        id(sizeX,r0,r1,x) = (x-r0)/(r1-r0)*(sizeX-1);
        // Prepare the 'float' table read index for all parameters
        ids =
            ro.interleave(N,4)
            : par(i, N, id) ;

        // one waveform parameter write value:
        wfp(prevSize,sizeX,r0,r1) =
            r0+
            ((float(int(ba.time%(prevSize*sizeX)/prevSize))*(r1-r0))
            /float(sizeX-1))
           ,(prevSize*sizeX);

        // all waveform parameters write values:
        wfps =
            ro.interleave(N,3)
            : (1,si.bus(3*N))
            : seq(i, N, si.bus(i),wfp, si.bus(3*N-(3*(i+1))))
            : (si.bus(N),!);

        // Create the table
        wf = (wfps,par(i, N, !)):function;

        // Limit the table read index in [0, mid] if C = 1
        // we do this both for the total table and per dimension
        rid(x,mid, 0) = int(x);
        rid(x,mid, 1) = max(int(0), min(int(x), mid));

        // for ``.val`` this is the stored value closest to the point of 
interest
        // for  ``.lin`` and ``cub`` it's the closest stored value at or below 
the point of interest
        readIndex(sizesIds) =
            // table sizes and ids for each dimension
            sizesIds
            // get the raw read index
            : ri
              // limit it and make it an int
            : riPost ;
        // helper function to route the arguments of rid
        riPost(size,ri) =
            rid(ri,size-int(1),C);
        // the raw read index
        ri =
            // interleave the sizes and the ids
            ro.interleave(N,2)
            // the first iteration gets:
            // 1 as the size of the previous dimension and 0 as the read index 
of the previous dimension
            : (1,0
                 // pass trough the sizes and ids
               ,si.bus(2*N))
              // for each dimension
            : seq(i, N,
                  // get the step size and the partial index for the next 
dimension
                  riN
                  // pass trough the sizes and ids for the next dimensions
                  , si.bus(2*(N-i-1))) ;

        // get the step size and the partial index for the next dimension
        riN(prevSize,prevIX,sizeX,idX) =
            // each step size is the previous step size times the current
            (prevSize*sizeX)
            // but the step we actually take in this dimension is the stepsize 
we calculated in the previous dimension
          , ( (prevSize*
               // round down and limit the id for this dimension to get the 
number of steps
               rid(int(idX),(sizeX-int(1)),C))
              // add the result of that to the one we calculated in the 
previous dimension
              +prevIX) ;

        // table sizes and ids for each dimension
        // just a helper function to simplify routing
        sizesIds =
            (
                // from sizes
                ( bs<:si.bus(N*2) )
                // from r0s,r1s,xs
              , si.bus(N*3)
            ) :
            // from sizes
            (si.bus(N)
             // takes (midX,r0,r1,x)
            ,ids);

        // the value of a single bit when binary counting
        int2bin(i,n,maxN) = int(int((n)/(1<<i))%int(2));
        // shortcut
        bs = si.bus(N);
};

_______________________________________________
Faudiostream-users mailing list
Faudiostream-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/faudiostream-users

[Faudiostream-users] tabulateNd().cub vs 2nd order Taylor approximation

Reply via email to