Update of /cvsroot/audacity/lib-src/libnyquist/nyquist/ffts/src
In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv8506/ffts/src

Added Files:
        dxpose.c dxpose.h fft2d.c fft2d.h fftext.c fftext.h fftlib.c 
        fftlib.h files files~ matlib.c matlib.h 
Log Message:
Updating to Nyquist v3.03.


--- NEW FILE: files ---
tr "\r" "\n" < dxpose.c > tmp.c
mv tmp.c dxpose.c
tr "\r" "\n" < dxpose.h > tmp.c
mv tmp.c dxpose.h
tr "\r" "\n" < fft2d.c > tmp.c
mv tmp.c fft2d.c
tr "\r" "\n" < fft2d.h > tmp.c
mv tmp.c fft2d.h
tr "\r" "\n" < fftext.c > tmp.c
mv tmp.c fftext.c
tr "\r" "\n" < fftext.h > tmp.c
mv tmp.c fftext.h
tr "\r" "\n" < fftlib.c > tmp.c
mv tmp.c fftlib.c
tr "\r" "\n" < fftlib.h > tmp.c
mv tmp.c fftlib.h
tr "\r" "\n" < files > tmp.c
mv tmp.c n" < files
tr "\r" "\n" < matlib.c > tmp.c
mv tmp.c matlib.c
tr "\r" "\n" < matlib.h > tmp.c
mv tmp.c matlib.h

--- NEW FILE: fftlib.c ---
/*******************************************************************
lower level fft stuff including routines called in fftext.c and fft2d.c
*******************************************************************/
#include "fftlib.h"
#include <math.h>
#define MCACHE  (11-(sizeof(float)/8))  // fft's with M bigger than this bust 
primary cache
#ifdef WIN32
#define inline __inline
#endif

// some math constants to 40 decimal places
#define MYPI            3.141592653589793238462643383279502884197       // pi
#define MYROOT2         1.414213562373095048801688724209698078569       // 
sqrt(2)
#define MYCOSPID8       0.9238795325112867561281831893967882868224      // 
cos(pi/8)
#define MYSINPID8       0.3826834323650897717284599840303988667613      // 
sin(pi/8)


/*************************************************
routines to initialize tables used by fft routines
[...3135 lines suppressed...]
                }

                if ( (M-1-(StageCnt * 3)) == 2 ){
                        ibfR4(ioptr, M, NDiffU);                        /* 1 
radix 4 stage */
                        NDiffU *= 4;
                }

                if (M <= MCACHE){
                        ibfstages(ioptr, M, Utbl, 2, NDiffU, StageCnt);         
/*  RADIX 8 Stages      */
                }

                else{
                        ifftrecurs(ioptr, M, Utbl, 2, NDiffU, StageCnt);        
        /*  RADIX 8 Stages      */
                }

                ioptr += 2*POW2(M);
        }
}
}


--- NEW FILE: fftext.c ---
/*******************************************************************
        This file extends the fftlib with calls to maintain the cosine and bit 
reversed tables
        for you (including mallocs and free's).  Call the init routine for each 
fft size you will
        be using.  Then you can call the fft routines below which will make the 
fftlib library 
        call with the appropriate tables passed.  When you are done with all 
fft's you can call 
        fftfree to release the storage for the tables.  Note that you can call 
fftinit repeatedly
        with the same size, the extra calls will be ignored. So, you could make 
a macro to call
        fftInit every time you call ffts. For example you could have someting 
like:
        #define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) 
ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
*******************************************************************/
#include <stdlib.h>
#include "fftlib.h"
#include "matlib.h"
#include "fftext.h"

// pointers to storage of Utbl's and  BRLow's
static float *UtblArray[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
                                                                        
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
static short *BRLowArray[8*sizeof(long)/2]  = {0,0,0,0,0,0,0,0, 
0,0,0,0,0,0,0,0};

int fftInit(long M){
// malloc and init cosine and bit reversed tables for a given size fft, ifft, 
rfft, rifft
/* INPUTS */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* OUTPUTS */
/* private cosine and bit reversed tables       */

int theError = 1;
/*** I did NOT test cases with M>27 ***/
if ((M >= 0) && (M < 8*sizeof(long))){
        theError = 0;
        if (UtblArray[M] == 0){ // have we not inited this size fft yet?
                // init cos table
                UtblArray[M] = (float *) malloc( (POW2(M)/4+1)*sizeof(float) );
                if (UtblArray[M] == 0)
                        theError = 2;
                else{
                        fftCosInit(M, UtblArray[M]);
                }
                if (M > 1){
                        if (BRLowArray[M/2] == 0){      // init bit reversed 
table for cmplx fft
                                BRLowArray[M/2] = (short *) malloc( 
POW2(M/2-1)*sizeof(short) );
                                if (BRLowArray[M/2] == 0)
                                        theError = 2;
                                else{
                                        fftBRInit(M, BRLowArray[M/2]);
                                }
                        }
                }
                if (M > 2){
                        if (BRLowArray[(M-1)/2] == 0){  // init bit reversed 
table for real fft
                                BRLowArray[(M-1)/2] = (short *) malloc( 
POW2((M-1)/2-1)*sizeof(short) );
                                if (BRLowArray[(M-1)/2] == 0)
                                        theError = 2;
                                else{
                                        fftBRInit(M-1, BRLowArray[(M-1)/2]);
                                }
                        }
                }
        }
};
return theError;
}

void fftFree(){
// release storage for all private cosine and bit reversed tables
long i1;
for (i1=8*sizeof(long)/2-1; i1>=0; i1--){
        if (BRLowArray[i1] != 0){
                free(BRLowArray[i1]);
                BRLowArray[i1] = 0;
        };
};
for (i1=8*sizeof(long)-1; i1>=0; i1--){
        if (UtblArray[i1] != 0){
                free(UtblArray[i1]);
                UtblArray[i1] = 0;
        };
};
}

/*************************************************
 The following calls are easier than calling to fftlib directly.
 Just make sure fftlib has been called for each M first.
**************************************************/

void ffts(float *data, long M, long Rows){
/* Compute in-place complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   */
        ffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
}

void iffts(float *data, long M, long Rows){
/* Compute in-place inverse complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   */
        iffts1(data, M, Rows, UtblArray[M], BRLowArray[M/2]);
}

void rffts(float *data, long M, long Rows){
/* Compute in-place real fft on the rows of the input array     */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* See rspectprod for multiplying two of these spectra together- ex. for fast 
convolution */
/* INPUTS */
/* *ioptr = real input data array       */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */
        rffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
}

void riffts(float *data, long M, long Rows){
/* Compute in-place real ifft on the rows of the input array    */
/* data order as from rffts */
/* INPUTS */
/* *ioptr = input data array in the following order     */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = real output data array      */
        riffts1(data, M, Rows, UtblArray[M], BRLowArray[(M-1)/2]);
}

void rspectprod(float *data1, float *data2, float *outdata, long N){
// When multiplying a pair of spectra from rfft care must be taken to multiply 
the
// two real values seperately from the complex ones. This routine does it 
correctly.
// the result can be stored in-place over one of the inputs
/* INPUTS */
/* *data1 = input data array    first spectra */
/* *data2 = input data array    second spectra */
/* N = fft input size for both data1 and data2 */
/* OUTPUTS */
/* *outdata = output data array spectra */
if(N>1){
        outdata[0] = data1[0] * data2[0];       // multiply the zero freq values
        outdata[1] = data1[1] * data2[1];       // multiply the nyquest freq 
values
        cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1);       // multiply the 
other positive freq values
}
else{
        outdata[0] = data1[0] * data2[0];
}
}

--- NEW FILE: dxpose.h ---
/*********************
This matrix transpose is in a seperate file because it should always be double 
precision.
*********************/
typedef double_t xdouble;       // I use double_t so that global search and 
replace on double won't 
                                                // change this to float 
accidentally.

void dxpose(xdouble *indata, long iRsiz, xdouble *outdata, long oRsiz, long 
Nrows, long Ncols);
/* not in-place double precision matrix transpose       */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

--- NEW FILE: fftlib.h ---
#define MYRECIPLN2      1.442695040888963407359924681001892137426       // 
1.0/log(2)

/* some useful conversions between a number and its power of 2 */
#define LOG2(a) (MYRECIPLN2*log(a))     // floating point logarithm base 2
#define POW2(m) ((unsigned long) 1 << (m))      // integer power of 2 for m<32

/*******************************************************************
lower level fft stuff called by routines in fftext.c and fft2d.c
*******************************************************************/

void fftCosInit(long M, float *Utbl);
/* Compute Utbl, the cosine table for ffts      */
/* of size (pow(2,M)/4 +1)      */
/* INPUTS */
/* M = log2 of fft size */
/* OUTPUTS */
/* *Utbl = cosine table */

void fftBRInit(long M, short *BRLow);
/* Compute BRLow, the bit reversed table for ffts       */
/* of size pow(2,M/2 -1)        */
/* INPUTS */
/* M = log2 of fft size */
/* OUTPUTS */
/* *BRLow = bit reversed counter table  */

void ffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 
dimensional array)       */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table  */
/* OUTPUTS */
/* *ioptr = output data array   */

void iffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place inverse complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 
dimensional array)       */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table  */
/* OUTPUTS */
/* *ioptr = output data array   */

void rffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place real fft on the rows of the input array     */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* INPUTS */
/* *ioptr = real input data array       */
/* M = log2 of fft size */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 
dimensional array)       */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table  */
/* OUTPUTS */
/* *ioptr = output data array   in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */


void riffts1(float *ioptr, long M, long Rows, float *Utbl, short *BRLow);
/* Compute in-place real ifft on the rows of the input array    */
/* data order as from rffts1 */
/* INPUTS */
/* *ioptr = input data array in the following order     */
/* M = log2 of fft size */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 
dimensional array)       */
/* *Utbl = cosine table */
/* *BRLow = bit reversed counter table  */
/* OUTPUTS */
/* *ioptr = real output data array      */

--- NEW FILE: dxpose.c ---
/*********************
This matrix transpose is in a seperate file because it should always be double 
precision.
*********************/
#include "dxpose.h"
void dxpose(xdouble *indata, long iRsiz, xdouble *outdata, long oRsiz, long 
Nrows, long Ncols){
/* not in-place double precision matrix transpose       */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

xdouble *irow;          /* pointer to input row start */
xdouble *ocol;          /* pointer to output col start */
xdouble *idata;         /* pointer to input data */
xdouble *odata;         /* pointer to output data */
long    RowCnt;         /* row counter */
long    ColCnt;         /* col counter */
xdouble T0;             /* data storage */
xdouble T1;             /* data storage */
xdouble T2;             /* data storage */
xdouble T3;             /* data storage */
xdouble T4;             /* data storage */
xdouble T5;             /* data storage */
xdouble T6;             /* data storage */
xdouble T7;             /* data storage */
const long inRsizd1 = iRsiz;
const long inRsizd2 = 2*iRsiz;
const long inRsizd3 = inRsizd2+iRsiz;
const long inRsizd4 = 4*iRsiz;
const long inRsizd5 = inRsizd3+inRsizd2;
const long inRsizd6 = inRsizd4+inRsizd2;
const long inRsizd7 = inRsizd4+inRsizd3;
const long inRsizd8 = 8*iRsiz;

ocol = outdata;
irow = indata;
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
        idata = irow;
        odata = ocol;
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                T0 = *idata;
                T1 = *(idata+inRsizd1);
                T2 = *(idata+inRsizd2);
                T3 = *(idata+inRsizd3);
                T4 = *(idata+inRsizd4);
                T5 = *(idata+inRsizd5);
                T6 = *(idata+inRsizd6);
                T7 = *(idata+inRsizd7);
                *odata = T0;
                *(odata+1) = T1;
                *(odata+2) = T2;
                *(odata+3) = T3;
                *(odata+4) = T4;
                *(odata+5) = T5;
                *(odata+6) = T6;
                *(odata+7) = T7;
                idata++;
                odata += oRsiz;
        }
        irow += inRsizd8;
        ocol += 8;
}
if (Nrows%8 != 0){
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                idata = irow++;
                odata = ocol;
                ocol += oRsiz;
                for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
                        T0 = *idata;
                        *odata++ = T0;
                        idata += iRsiz;
                }
        }
}
}

--- NEW FILE: fftext.h ---
/*******************************************************************
        This file extends the fftlib with calls to maintain the cosine and bit 
reversed tables
        for you (including mallocs and free's).  Call the init routine for each 
fft size you will
        be using.  Then you can call the fft routines below which will make the 
fftlib library 
        call with the appropriate tables passed.  When you are done with all 
fft's you can call 
        fftfree to release the storage for the tables.  Note that you can call 
fftinit repeatedly
        with the same size, the extra calls will be ignored. So, you could make 
a macro to call
        fftInit every time you call ffts. For example you could have someting 
like:
        #define FFT(a,n) if(!fftInit(roundtol(LOG2(n)))) 
ffts(a,roundtol(LOG2(n)),1);else printf("fft error\n");
*******************************************************************/

int fftInit(long M);
// malloc and init cosine and bit reversed tables for a given size fft, ifft, 
rfft, rifft
/* INPUTS */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* OUTPUTS */
/* private cosine and bit reversed tables       */

void fftFree();
// release storage for all private cosine and bit reversed tables

void ffts(float *data, long M, long Rows);
/* Compute in-place complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   */

void iffts(float *data, long M, long Rows);
/* Compute in-place inverse complex fft on the rows of the input array  */
/* INPUTS */
/* *ioptr = input data array    */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   */

void rffts(float *data, long M, long Rows);
/* Compute in-place real fft on the rows of the input array     */
/* The result is the complex spectra of the positive frequencies */
/* except the location for the first complex number contains the real */
/* values for DC and Nyquest */
/* See rspectprod for multiplying two of these spectra together- ex. for fast 
convolution */
/* INPUTS */
/* *ioptr = real input data array       */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = output data array   in the following order */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */

void riffts(float *data, long M, long Rows);
/* Compute in-place real ifft on the rows of the input array    */
/* data order as from rffts */
/* INPUTS */
/* *ioptr = input data array in the following order     */
/* M = log2 of fft size (ex M=10 for 1024 point fft) */
/* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... 
Re(x[N/2-1]), Im(x[N/2-1]). */
/* Rows = number of rows in ioptr array (use 1 for Rows for a single fft)       
*/
/* OUTPUTS */
/* *ioptr = real output data array      */

void rspectprod(float *data1, float *data2, float *outdata, long N);
// When multiplying a pair of spectra from rfft care must be taken to multiply 
the
// two real values seperately from the complex ones. This routine does it 
correctly.
// the result can be stored in-place over one of the inputs
/* INPUTS */
/* *data1 = input data array    first spectra */
/* *data2 = input data array    second spectra */
/* N = fft input size for both data1 and data2 */
/* OUTPUTS */
/* *outdata = output data array spectra */


// The following is FYI


//Note that most of the fft routines require full matrices, ie Rsiz==Ncols
//This is how I like to define a real matrix:
//struct matrix {               // real matrix
//      float *d;               // pointer to data
//      long Nrows;             // number of rows in the matrix
//      long Ncols;             // number of columns in the matrix (can be less 
than Rsiz)
//      long Rsiz;              // number of floats from one row to the next
//};
//typedef struct matrix matrix;



// CACHEFILLMALLOC and CEILCACHELINE can be used instead of malloc to make
// arrays that start exactly on a cache line start.
// First we CACHEFILLMALLOC a void * (use this void * when free'ing),
// then we set our array pointer equal to the properly cast CEILCACHELINE of 
this void *
// example:
// aInit = CACHEFILLMALLOC( NUMFLOATS*sizeof(float) );
// a = (float *) CEILCACHELINE(ainit);
// ... main body of code ...
// free(aInit);
//
// To disable this alignment, set CACHELINESIZE to 1
//#define CACHELINESIZE 32                              // Bytes per cache line
//#define CACHELINEFILL (CACHELINESIZE-1)
//#define CEILCACHELINE(p) ((((unsigned 
long)p+CACHELINEFILL)/CACHELINESIZE)*CACHELINESIZE)
//#define CACHEFILLMALLOC(n) malloc((n)+CACHELINEFILL)

--- NEW FILE: matlib.c ---
/* a few routines from a vector/matrix library */
#include "matlib.h"

void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, 
long Ncols){
/* not in-place matrix transpose        */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

float   *irow;          /* pointer to input row start */
float   *ocol;          /* pointer to output col start */
float   *idata;         /* pointer to input data */
float   *odata;         /* pointer to output data */
long    RowCnt;         /* row counter */
long    ColCnt;         /* col counter */
float   T0;             /* data storage */
float   T1;             /* data storage */
float   T2;             /* data storage */
float   T3;             /* data storage */
float   T4;             /* data storage */
float   T5;             /* data storage */
float   T6;             /* data storage */
float   T7;             /* data storage */
const long inRsizd1 = iRsiz;
const long inRsizd2 = 2*iRsiz;
const long inRsizd3 = inRsizd2+iRsiz;
const long inRsizd4 = 4*iRsiz;
const long inRsizd5 = inRsizd3+inRsizd2;
const long inRsizd6 = inRsizd4+inRsizd2;
const long inRsizd7 = inRsizd4+inRsizd3;
const long inRsizd8 = 8*iRsiz;

ocol = outdata;
irow = indata;
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){
        idata = irow;
        odata = ocol;
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                T0 = *idata;
                T1 = *(idata+inRsizd1);
                T2 = *(idata+inRsizd2);
                T3 = *(idata+inRsizd3);
                T4 = *(idata+inRsizd4);
                T5 = *(idata+inRsizd5);
                T6 = *(idata+inRsizd6);
                T7 = *(idata+inRsizd7);
                *odata = T0;
                *(odata+1) = T1;
                *(odata+2) = T2;
                *(odata+3) = T3;
                *(odata+4) = T4;
                *(odata+5) = T5;
                *(odata+6) = T6;
                *(odata+7) = T7;
                idata++;
                odata += oRsiz;
        }
        irow += inRsizd8;
        ocol += 8;
}
if (Nrows%8 != 0){
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                idata = irow++;
                odata = ocol;
                ocol += oRsiz;
                for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){
                        T0 = *idata;
                        *odata++ = T0;
                        idata += iRsiz;
                }
        }
}
}

void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, 
long Ncols){
/* not in-place complex float matrix transpose  */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

float   *irow;          /* pointer to input row start */
float   *ocol;          /* pointer to output col start */
float   *idata;         /* pointer to input data */
float   *odata;         /* pointer to output data */
long    RowCnt;         /* row counter */
long    ColCnt;         /* col counter */
float   T0r;            /* data storage */
float   T0i;            /* data storage */
float   T1r;            /* data storage */
float   T1i;            /* data storage */
float   T2r;            /* data storage */
float   T2i;            /* data storage */
float   T3r;            /* data storage */
float   T3i;            /* data storage */
const long inRsizd1 = 2*iRsiz;
const long inRsizd1i = 2*iRsiz + 1;
const long inRsizd2 = 4*iRsiz;
const long inRsizd2i = 4*iRsiz + 1;
const long inRsizd3 = inRsizd2+inRsizd1;
const long inRsizd3i = inRsizd2+inRsizd1 + 1;
const long inRsizd4 = 8*iRsiz;

ocol = outdata;
irow = indata;
for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){
        idata = irow;
        odata = ocol;
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                T0r = *idata;
                T0i = *(idata +1);
                T1r = *(idata+inRsizd1);
                T1i = *(idata+inRsizd1i);
                T2r = *(idata+inRsizd2);
                T2i = *(idata+inRsizd2i);
                T3r = *(idata+inRsizd3);
                T3i = *(idata+inRsizd3i);
                *odata = T0r;
                *(odata+1) = T0i;
                *(odata+2) = T1r;
                *(odata+3) = T1i;
                *(odata+4) = T2r;
                *(odata+5) = T2i;
                *(odata+6) = T3r;
                *(odata+7) = T3i;
                idata+=2;
                odata += 2*oRsiz;
        }
        irow += inRsizd4;
        ocol += 8;
}
if (Nrows%4 != 0){
        for (ColCnt=Ncols; ColCnt>0; ColCnt--){
                idata = irow;
                odata = ocol;
                for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){
                        T0r = *idata;
                        T0i = *(idata+1);
                        *odata = T0r;
                        *(odata+1) = T0i;
                        odata+=2;
                        idata += 2*iRsiz;
                }
                irow+=2;
                ocol += 2*oRsiz;
        }
}
}

void cvprod(float *a, float *b, float *out, long N){
/* complex vector product, can be in-place */
/* product of complex vector *a times complex vector *b */
/* INPUTS */
/* N vector length */
/* *a complex vector length N complex numbers */
/* *b complex vector length N complex numbers */
/* OUTPUTS */
/* *out complex vector length N */

long    OutCnt;         /* output counter */
float   A0R;            /* A storage */
float   A0I;            /* A storage */
float   A1R;            /* A storage */
float   A1I;            /* A storage */
float   A2R;            /* A storage */
float   A2I;            /* A storage */
float   A3R;            /* A storage */
float   A3I;            /* A storage */
float   B0R;            /* B storage */
float   B0I;            /* B storage */
float   B1R;            /* B storage */
float   B1I;            /* B storage */
float   B2R;            /* B storage */
float   B2I;            /* B storage */
float   B3R;            /* B storage */
float   B3I;            /* B storage */
float   T0R;            /* TMP storage */
float   T0I;            /* TMP storage */
float   T1R;            /* TMP storage */
float   T1I;            /* TMP storage */
float   T2R;            /* TMP storage */
float   T2I;            /* TMP storage */
float   T3R;            /* TMP storage */
float   T3I;            /* TMP storage */

if (N>=4){
        A0R = *a;
        B0R = *b;
        A0I = *(a +1);
        B0I = *(b +1);
        A1R = *(a +2);
        B1R = *(b +2);
        A1I = *(a +3);
        B1I = *(b +3);
        A2R = *(a +4);
        B2R = *(b +4);
        A2I = *(a +5);
        B2I = *(b +5);
        A3R = *(a +6);
        B3R = *(b +6);
        A3I = *(a +7);
        B3I = *(b +7);
        T0R = A0R * B0R;
        T0I = (A0R * B0I);
        T1R = A1R * B1R;
        T1I = (A1R * B1I);
        T2R = A2R * B2R;
        T2I = (A2R * B2I);
        T3R = A3R * B3R;
        T3I = (A3R * B3I);
        T0R -= (A0I * B0I);
        T0I = A0I * B0R + T0I;
        T1R -= (A1I * B1I);
        T1I = A1I * B1R + T1I;
        T2R -= (A2I * B2I);
        T2I = A2I * B2R + T2I;
        T3R -= (A3I * B3I);
        T3I = A3I * B3R + T3I;
        for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){
                a += 8;
                b += 8;
                A0R = *a;
                B0R = *b;
                A0I = *(a +1);
                B0I = *(b +1);
                A1R = *(a +2);
                B1R = *(b +2);
                A1I = *(a +3);
                B1I = *(b +3);
                A2R = *(a +4);
                B2R = *(b +4);
                A2I = *(a +5);
                B2I = *(b +5);
                A3R = *(a +6);
                B3R = *(b +6);
                A3I = *(a +7);
                B3I = *(b +7);
                *out = T0R;
                *(out +1) = T0I;
                *(out +2) = T1R;
                *(out +3) = T1I;
                *(out +4) = T2R;
                *(out +5) = T2I;
                *(out +6) = T3R;
                *(out +7) = T3I;
                T0R = A0R * B0R;
                T0I = (A0R * B0I);
                T1R = A1R * B1R;
                T1I = (A1R * B1I);
                T2R = A2R * B2R;
                T2I = (A2R * B2I);
                T3R = A3R * B3R;
                T3I = (A3R * B3I);
                T0R -= (A0I * B0I);
                T0I = A0I * B0R + T0I;
                T1R -= (A1I * B1I);
                T1I = A1I * B1R + T1I;
                T2R -= (A2I * B2I);
                T2I = A2I * B2R + T2I;
                T3R -= (A3I * B3I);
                T3I = A3I * B3R + T3I;
                out += 8;
        }
        a += 8;
        b += 8;
        *out = T0R;
        *(out +1) = T0I;
        *(out +2) = T1R;
        *(out +3) = T1I;
        *(out +4) = T2R;
        *(out +5) = T2I;
        *(out +6) = T3R;
        *(out +7) = T3I;
        out += 8;
}
for (OutCnt=N%4; OutCnt > 0; OutCnt--){
        A0R = *a++;
        B0R = *b++;
        A0I = *a++;
        B0I = *b++;
        T0R = A0R * B0R;
        T0I = (A0R * B0I);
        T0R -= (A0I * B0I);
        T0I = A0I * B0R + T0I;
        *out++ = T0R;
        *out++ = T0I;
}
}

--- NEW FILE: files~ ---
dxpose.c
dxpose.h
fft2d.c
fft2d.h
fftext.c
fftext.h
fftlib.c
fftlib.h
files
matlib.c
matlib.h

--- NEW FILE: fft2d.c ---
/*******************************************************************
        This file extends the fftlib with 2d and 3d complex fft's and
        2d real fft's.  All fft's return results in-place.  Temporary buffers
        for transposing columns are maintained privately via calls to
        fft2dInit, fft2dFree, fft3dInit, and fft3dFree.
        Note that you can call fft2dInit and fft3dInit repeatedly
        with the same sizes, the extra calls will be ignored.
        So, you could make a macro to call fft2dInit every time you call fft2d.
        *** Warning *** fft2dFree and fft3dFree also call fftFree
        so you must re-init all 1d fft sizes you are going to continue using
*******************************************************************/
#include <stdlib.h>
#include <fp.h>
#include "fftlib.h"
#include "fftext.h"
#include "matlib.h"
#include "dxpose.h"
#include "fft2d.h"
        // use trick of using a real double transpose in place of a complex 
transpose if it fits
#define cxpose(a,b,c,d,e,f) (2*sizeof(float)==sizeof(xdouble)) ? 
dxpose((xdouble *)(a), b, (xdouble *)(c), d, e, f) : cxpose(a,b,c,d,e,f);
        // for this trick to work you must NOT replace the xdouble declarations 
in
        // dxpose with float declarations.


        // pointers for temporary storage for four columns
static float *Array2d[8*sizeof(long)] = {0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
                                                                        
0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0};
int fft2dInit(long M2, long M){
        // init for fft2d, ifft2d, rfft2d, and rifft2d
        // malloc storage for 4 columns of 2d ffts then call fftinit for both 
row and column ffts sizes
/* INPUTS */
/* M = log2 of number of columns */
/* M2 = log2 of number of rows */
/*       of 2d matrix to be fourier transformed */
/* OUTPUTS */
/* private storage for columns of 2d ffts       */
/* calls fftInit for cosine and bit reversed tables     */
int theError = 1;
if ((M2 >= 0) && (M2 < 8*sizeof(long))){
        theError = 0;
        if (Array2d[M2] == 0){
                Array2d[M2] = (float *) malloc( 4*2*POW2(M2)*sizeof(float) );
                if (Array2d[M2] == 0)
                        theError = 2;
                else{
                        theError = fftInit(M2);
                }
        }
        if (theError == 0)
                theError = fftInit(M);
}
return theError;
}

void fft2dFree(){
// free storage for columns of 2d ffts and call fftFree to free all BRLow and 
Utbl storage
long i1;
for (i1=8*sizeof(long)-1; i1>=0; i1--){
        if (Array2d[i1] != 0){
                free(Array2d[i1]);
                Array2d[i1] = 0;
        };
};
fftFree();
}

void fft2d(float *data, long M2, long M){
/* Compute 2D complex fft and return results in-place   */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */
long i1;
if((M2>0)&&(M>0)){
        ffts(data, M, POW2(M2));
        if (M>2)
                for (i1=0; i1<POW2(M); i1+=4){
                        cxpose(data + i1*2, POW2(M), Array2d[M2], POW2(M2), 
POW2(M2), 4);
                        ffts(Array2d[M2], M2, 4);
                        cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M), 4, 
POW2(M2));
                }
        else{
                cxpose(data, POW2(M), Array2d[M2], POW2(M2), POW2(M2), POW2(M));
                ffts(Array2d[M2], M2, POW2(M));
                cxpose(Array2d[M2], POW2(M2), data, POW2(M), POW2(M), POW2(M2));
        }
}
else
        ffts(data, M2+M, 1);
}

void ifft2d(float *data, long M2, long M){
/* Compute 2D complex ifft and return results in-place  */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */
long i1;
if((M2>0)&&(M>0)){
        iffts(data, M, POW2(M2));
        if (M>2)
                for (i1=0; i1<POW2(M); i1+=4){
                        cxpose(data + i1*2, POW2(M), Array2d[M2], POW2(M2), 
POW2(M2), 4);
                        iffts(Array2d[M2], M2, 4);
                        cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M), 4, 
POW2(M2));
                }
        else{
                cxpose(data, POW2(M), Array2d[M2], POW2(M2), POW2(M2), POW2(M));
                iffts(Array2d[M2], M2, POW2(M));
                cxpose(Array2d[M2], POW2(M2), data, POW2(M), POW2(M), POW2(M2));
        }
}
else
        iffts(data, M2+M, 1);
}

int fft3dInit(long L, long M2, long M){
        // init for fft3d, ifft3d
        // malloc storage for 4 columns and 4 pages of 3d ffts
        // then call fftinit for page, row and column ffts sizes
//* M = log2 of number of columns */
/* M2 = log2 of number of rows */
/* L = log2 of number of pages */
/*       of 3d matrix to be fourier transformed */
/* OUTPUTS */
/* private storage for columns and pages of 3d ffts     */
/* calls fftInit for cosine and bit reversed tables     */
int theError = 1;
if ((L >= 0) && (L < 8*sizeof(long))){
        theError = 0;
        if (Array2d[L] == 0){
                Array2d[L] = (float *) malloc( 4*2*POW2(L)*sizeof(float) );
                if (Array2d[L] == 0)
                        theError = 2;
                else{
                        theError = fftInit(L);
                }
        }
        if (theError == 0){
                if (Array2d[M2] == 0){
                        Array2d[M2] = (float *) malloc( 
4*2*POW2(M2)*sizeof(float) );
                        if (Array2d[M2] == 0)
                                theError = 2;
                        else{
                                theError = fftInit(M2);
                        }
                }
        }
        if (theError == 0)
                theError = fftInit(M);
}
return theError;
}

void fft3dFree(){
// free storage for columns of all 2d&3d ffts and call fftFree to free all 
BRLow and Utbl storage
fft2dFree();
}

void fft3d(float *data, long M3, long M2, long M){
/* Compute 2D complex fft and return results in-place   */
/* INPUTS */
/* *data = input data array     */
/* M3 = log2 of fft size number of pages */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */
long i1;
long i2;
const long N = POW2(M);
const long N2 = POW2(M2);
const long N3 = POW2(M3);
if((M3>0)&&(M2>0)&&(M>0)){
        ffts(data, M, N3*N2);
        if (M>2)
                for (i2=0; i2<N3; i2++){
                        for (i1=0; i1<N; i1+=4){
                                cxpose(data + i2*2*POW2(M2+M) + i1*2, N, 
Array2d[M2], N2, N2, 4);
                                ffts(Array2d[M2], M2, 4);
                                cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M) 
+ i1*2, N, 4, N2);
                        }
                }
        else{
                for (i2=0; i2<N3; i2++){
                        cxpose(data + i2*2*POW2(M2+M), N, Array2d[M2], N2, N2, 
N);
                        ffts(Array2d[M2], M2, N);
                        cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M), N, N, 
N2);
                }
        }
        if ((M2+M)>2)
                for (i1=0; i1<POW2(M2+M); i1+=4){
                        cxpose(data + i1*2, POW2(M2+M), Array2d[M3], N3, N3, 4);
                        ffts(Array2d[M3], M3, 4);
                        cxpose(Array2d[M3], N3, data + i1*2, POW2(M2+M), 4, N3);
                }
        else{
                cxpose(data, POW2(M2+M), Array2d[M3], N3, N3, POW2(M2+M));
                ffts(Array2d[M3], M3, POW2(M2+M));
                cxpose(Array2d[M3], N3, data, POW2(M2+M), POW2(M2+M), N3);
        }
}
else
        if(M3==0) fft2d(data, M2, M);
        else
                if(M2==0) fft2d(data, M3, M);
                else
                        if(M==0) fft2d(data, M3, M2);
}

void ifft3d(float *data, long M3, long M2, long M){
/* Compute 2D complex ifft and return results in-place  */
/* INPUTS */
/* *data = input data array     */
/* M3 = log2 of fft size number of pages */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */
long i1;
long i2;
const long N = POW2(M);
const long N2 = POW2(M2);
const long N3 = POW2(M3);
if((M3>0)&&(M2>0)&&(M>0)){
        iffts(data, M, N3*N2);
        if (M>2)
                for (i2=0; i2<N3; i2++){
                        for (i1=0; i1<N; i1+=4){
                                cxpose(data + i2*2*POW2(M2+M) + i1*2, N, 
Array2d[M2], N2, N2, 4);
                                iffts(Array2d[M2], M2, 4);
                                cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M) 
+ i1*2, N, 4, N2);
                        }
                }
        else{
                for (i2=0; i2<N3; i2++){
                        cxpose(data + i2*2*POW2(M2+M), N, Array2d[M2], N2, N2, 
N);
                        iffts(Array2d[M2], M2, N);
                        cxpose(Array2d[M2], N2, data + i2*2*POW2(M2+M), N, N, 
N2);
                }
        }
        if ((M2+M)>2)
                for (i1=0; i1<POW2(M2+M); i1+=4){
                        cxpose(data + i1*2, POW2(M2+M), Array2d[M3], N3, N3, 4);
                        iffts(Array2d[M3], M3, 4);
                        cxpose(Array2d[M3], N3, data + i1*2, POW2(M2+M), 4, N3);
                }
        else{
                cxpose(data, POW2(M2+M), Array2d[M3], N3, N3, POW2(M2+M));
                iffts(Array2d[M3], M3, POW2(M2+M));
                cxpose(Array2d[M3], N3, data, POW2(M2+M), POW2(M2+M), N3);
        }
}
else
        if(M3==0) ifft2d(data, M2, M);
        else
                if(M2==0) ifft2d(data, M3, M);
                else
                        if(M==0) ifft2d(data, M3, M2);
}

void rfft2d(float *data, long M2, long M){
/* Compute 2D real fft and return results in-place      */
/* First performs real fft on rows using size from M to compute positive 
frequencies */
/* then performs transform on columns using size from M2 to compute wavenumbers 
*/
/* If you think of the result as a complex pow(2,M2) by pow(2,M-1) matrix */
/* then the first column contains the positive wavenumber spectra of DC 
frequency */
/* followed by the positive wavenumber spectra of the nyquest frequency */
/* since these are two positive wavenumber spectra the first complex value */
/* of each is really the real values for the zero and nyquest wavenumber packed 
together */
/* All other columns contain the positive and negative wavenumber spectra of a 
positive frequency */
/* See rspect2dprod for multiplying two of these spectra together- ex. for fast 
convolution */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows in */
/* M = log2 of fft size number of columns in */
/* OUTPUTS */
/* *data = output data array    */
long i1;
if((M2>0)&&(M>0)){
        rffts(data, M, POW2(M2));
        if (M==1){
                cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), 
POW2(M2), 1);
                xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), 
POW2(M2), 2);
                rffts(Array2d[M2], M2, 2);
                cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2));
        }
        else if (M==2){
                cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), 
POW2(M2), 1);
                xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), 
POW2(M2), 2);
                rffts(Array2d[M2], M2, 2);
                cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2));

                cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1);
                ffts(Array2d[M2], M2, 1);
                cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 1, POW2(M2));
        }
        else{
                cxpose(data, POW2(M)/2, Array2d[M2]+POW2(M2)*2, POW2(M2), 
POW2(M2), 1);
                xpose(Array2d[M2]+POW2(M2)*2, 2, Array2d[M2], POW2(M2), 
POW2(M2), 2);
                rffts(Array2d[M2], M2, 2);
                cxpose(Array2d[M2], POW2(M2), data, POW2(M)/2, 1, POW2(M2));

                cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 3);
                ffts(Array2d[M2], M2, 3);
                cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 3, POW2(M2));
                for (i1=4; i1<POW2(M)/2; i1+=4){
                        cxpose(data + i1*2, POW2(M)/2, Array2d[M2], POW2(M2), 
POW2(M2), 4);
                        ffts(Array2d[M2], M2, 4);
                        cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M)/2, 
4, POW2(M2));
                }
        }
}
else
        rffts(data, M2+M, 1);
}

void rifft2d(float *data, long M2, long M){
/* Compute 2D real ifft and return results in-place     */
/* The input must be in the order as outout from rfft2d */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows out */
/* M = log2 of fft size number of columns out */
/* OUTPUTS */
/* *data = output data array    */
long i1;
if((M2>0)&&(M>0)){
        if (M==1){
                cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1);
                riffts(Array2d[M2], M2, 2);
                xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, 
POW2(M2));
                cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, 
POW2(M2));
        }
        else if (M==2){
                cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1);
                riffts(Array2d[M2], M2, 2);
                xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, 
POW2(M2)); 
                cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, 
POW2(M2));

                cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1);
                iffts(Array2d[M2], M2, 1);
                cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 1, POW2(M2));
        }
        else{
                cxpose(data, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 1);
                riffts(Array2d[M2], M2, 2);
                xpose(Array2d[M2], POW2(M2), Array2d[M2]+POW2(M2)*2, 2, 2, 
POW2(M2));
                cxpose(Array2d[M2]+POW2(M2)*2, POW2(M2), data, POW2(M)/2, 1, 
POW2(M2));

                cxpose(data + 2, POW2(M)/2, Array2d[M2], POW2(M2), POW2(M2), 3);
                iffts(Array2d[M2], M2, 3);
                cxpose(Array2d[M2], POW2(M2), data + 2, POW2(M)/2, 3, POW2(M2));
                for (i1=4; i1<POW2(M)/2; i1+=4){
                        cxpose(data + i1*2, POW2(M)/2, Array2d[M2], POW2(M2), 
POW2(M2), 4);
                        iffts(Array2d[M2], M2, 4);
                        cxpose(Array2d[M2], POW2(M2), data + i1*2, POW2(M)/2, 
4, POW2(M2));
                }
        }
        riffts(data, M, POW2(M2));
}
else
        riffts(data, M2+M, 1);
}

void rspect2dprod(float *data1, float *data2, float *outdata, long N2, long N1){
// When multiplying a pair of 2d spectra from rfft2d care must be taken to 
multiply the
// four real values seperately from the complex ones. This routine does it 
correctly.
// the result can be stored in-place over one of the inputs
/* *data1 = input data array    first spectra */
/* *data2 = input data array    second spectra */
/* N2 = fft size number of rows into rfft2d for both data1 and data2 */
/* N1 = fft size number of columns into rfft2d for both data1 and data2 */
long N = N2 * N1/2;             // number of "complex" numbers in spectra

if( (N2 > 1) && (N1>1)){
        outdata[0] = data1[0] * data2[0];       // multiply the zero freq, zero 
wavenumber values
        outdata[1] = data1[1] * data2[1];       // multiply the zero freq, 
nyquest wavenumber values

        cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1);

        outdata[N] = data1[N] * data2[N];               // multiply the nyquest 
freq, zero wavenumber values
        outdata[N+1] = data1[N+1] * data2[N+1]; // multiply the nyquest freq, 
nyquest wavenumber values

        cvprod(data1 + N+2, data2 + N+2, outdata + N+2, N/2-1);
}
else{   // really 1D rfft spectra
        N = N2 * N1;    // one of these is a 1
        if(N>1){
                outdata[0] = data1[0] * data2[0];       // multiply the zero 
freq values
                outdata[1] = data1[1] * data2[1];       // multiply the nyquest 
freq values
                cvprod(data1 + 2, data2 + 2, outdata + 2, N/2-1);       // 
multiply the other positive freq values
        }
        else{
                outdata[0] = data1[0] * data2[0];
        }
}
}
--- NEW FILE: fft2d.h ---
/*******************************************************************
        This file extends the fftlib with 2d and 3d complex fft's and
        2d real fft's.  All fft's return results in-place.  Temporary buffers
        for transposing columns are maintained privately via calls to
        fft2dInit, fft2dFree, fft3dInit, and fft3dFree.
        Note that you can call fft2dInit and fft3dInit repeatedly
        with the same sizes, the extra calls will be ignored.
        So, you could make a macro to call fft2dInit every time you call fft2d.
        *** Warning *** fft2dFree and fft3dFree also call fftFree
        so you must re-init all 1d fft sizes you are going to continue using
*******************************************************************/
int fft2dInit(long M2, long M);
        // init for fft2d, ifft2d, rfft2d, and rifft2d
        // malloc storage for columns of 2d ffts then call fftinit for both row 
and column ffts sizes
/* INPUTS */
/* M = log2 of number of columns */
/* M2 = log2 of number of rows */
/*       of 2d matrix to be fourier transformed */
/* OUTPUTS */
/* private storage for columns of 2d ffts       */
/* calls fftInit for cosine and bit reversed tables     */

void fft2dFree();
// free storage for columns of all 2d&3d ffts and call fftFree to free all 
BRLow and Utbl storage

void fft2d(float *data, long M2, long M);
/* Compute 2D complex fft and return results in-place   */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */

void ifft2d(float *data, long M2, long M);
/* Compute 2D complex ifft and return results in-place  */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */

int fft3dInit(long L, long M2, long M);
        // init for fft3d, ifft3d
        // malloc storage for 4 columns and 4 pages of 3d ffts
        // then call fftinit for page, row and column ffts sizes
/* M = log2 of number of columns */
/* M2 = log2 of number of rows */
/* L = log2 of number of pages */
/*       of 3d matrix to be fourier transformed */
/* OUTPUTS */
/* private storage for columns and pages of 3d ffts     */
/* calls fftInit for cosine and bit reversed tables     */

void fft3dFree();
// free storage for columns of all 2d&3d ffts and call fftFree to free all 
BRLow and Utbl storage

void fft3d(float *data, long M3, long M2, long M);
/* Compute 2D complex fft and return results in-place   */
/* INPUTS */
/* *data = input data array     */
/* M3 = log2 of fft size number of pages */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */

void ifft3d(float *data, long M3, long M2, long M);
/* Compute 2D complex ifft and return results in-place  */
/* INPUTS */
/* *data = input data array     */
/* M3 = log2 of fft size number of pages */
/* M2 = log2 of fft size number of rows */
/* M = log2 of fft size number of columns */
/* OUTPUTS */
/* *data = output data array    */

void rfft2d(float *data, long M2, long M);
/* Compute 2D real fft and return results in-place      */
/* First performs real fft on rows using size from M to compute positive 
frequencies */
/* then performs transform on columns using size from M2 to compute wavenumbers 
*/
/* If you think of the result as a complex pow(2,M2) by pow(2,M-1) matrix */
/* then the first column contains the positive wavenumber spectra of DC 
frequency */
/* followed by the positive wavenumber spectra of the nyquest frequency */
/* since these are two positive wavenumber spectra the first complex value */
/* of each is really the real values for the zero and nyquest wavenumber packed 
together */
/* All other columns contain the positive and negative wavenumber spectra of a 
positive frequency */
/* See rspect2dprod for multiplying two of these spectra together- ex. for fast 
convolution */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows in */
/* M = log2 of fft size number of columns in */
/* OUTPUTS */
/* *data = output data array    */

void rifft2d(float *data, long M2, long M);
/* Compute 2D real ifft and return results in-place     */
/* The input must be in the order as outout from rfft2d */
/* INPUTS */
/* *data = input data array     */
/* M2 = log2 of fft size number of rows out */
/* M = log2 of fft size number of columns out */
/* OUTPUTS */
/* *data = output data array    */

void rspect2dprod(float *data1, float *data2, float *outdata, long N2, long N1);
// When multiplying a pair of 2d spectra from rfft2d care must be taken to 
multiply the
// four real values seperately from the complex ones. This routine does it 
correctly.
// the result can be stored in-place over one of the inputs
/* *data1 = input data array    first spectra */
/* *data2 = input data array    second spectra */
/* N2 = fft size number of rows into rfft2d for both data1 and data2 */
/* N1 = fft size number of columns into rfft2d for both data1 and data2 */

--- NEW FILE: matlib.h ---
/* a few routines from a vector/matrix library */

void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, 
long Ncols);
/* not in-place matrix transpose        */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, 
long Ncols);
/* not in-place complex matrix transpose        */
/* INPUTS */
/* *indata = input data array   */
/* iRsiz = offset to between rows of input data array   */
/* oRsiz = offset to between rows of output data array  */
/* Nrows = number of rows in input data array   */
/* Ncols = number of columns in input data array        */
/* OUTPUTS */
/* *outdata = output data array */

void cvprod(float *a, float *b, float *out, long N);
/* complex vector product, can be in-place */
/* product of complex vector *a times complex vector *b */
/* INPUTS */
/* N vector length */
/* *a complex vector length N complex numbers */
/* *b complex vector length N complex numbers */
/* OUTPUTS */
/* *out complex vector length N */


------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Audacity-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/audacity-cvs

Reply via email to