[Mixxx-devel] Equalizer code

Andreas Pflug Tue, 19 May 2009 16:03:42 -0700

This is a kind of FYI-posting, summing up findings and code that I produced.

I've been investigating the enginefilterbutterworth8 code because the EQdoesn't sound as expected, and it consumes a lot of cpu (as known).

The reason for the sound is that engFilButterworth works (in terms ofsound equalizing) radically different than expected; it's basically afrequency dividing network with 8th order Butterworth filters (known forphase and group delay problems) giving three channels that aresummed/mixed. No surprise 4 filters of 8th order consume a lot of cycles...

A look at the actual code made me wonder why the calculation isimplemented double precision, while CSAMPE is float, and there's an uglymemmove per step.

Attached you can find my drop-in replacement code. There are twoversions in the file: the first c implementation basically is thecurrent stuff rewritten, omitting the memmove per step and using float,probably runnable on any platform.

The second version is a SSE implementation that works on gcc 4.3.3(64bit, probably identical on 32bit). It assumes parameters in xmm0, rdiand rsi.

There's quite some optimization potential, because xxxPS ops couldcalculate 4 values in parallel, while currently xxxSS ops calculate asingle value.


Regards,
Andreas

#include "filterstuff.h"


/* 

float assumptions(const float *coef, float *buf, register float val)
{
   val is in xmm0
   buf is in rsi
   coef is in rdi
   
   return value in xmm0
}
 
// xmm0 - var
// xmm1 - iir and tmp
// xmm2 - b0
// xmm3 - b1


#define BLOCKADD(i)  asm(				\
"movss	" i "+0(%rsi), %xmm2;	#b0	\n\t"	\
"movss	" i "+4(%rsi), %xmm3;	#b1	\n\t"	\
"movss	" i "+4(%rdi), %xmm1;	#c1	\n\t"	\
"mulss	%xmm2, %xmm1;			\n\t"	\
"subss	%xmm1, %xmm0; 			\n\t"	\
"movss	" i "+8(%rdi), %xmm1;	#c2	\n\t"	\
"mulss	%xmm3, %xmm1; 			\n\t"	\
"subss	%xmm1, %xmm0; 			\n\t"	\
"movaps	%xmm0, %xmm1;		#iir	\n\t"	\
"addss	%xmm2, %xmm0; 			\n\t"	\
"addss	%xmm3, %xmm0; 			\n\t"	\
"addss	%xmm3, %xmm0;		#val	\n\t"	\
"movss	%xmm3, " i "+0(%rsi); 		\n\t"	\
"movss	%xmm1, " i "+4(%rsi);	");

#define BLOCKSUB(i)  asm(				\
"movss	" i "+0(%rsi), %xmm2;	#b0	\n\t"	\
"movss	" i "+4(%rsi), %xmm3;	#b1	\n\t"	\
"movss	" i "+4(%rdi), %xmm1;	#c1	\n\t"	\
"mulss	%xmm2, %xmm1;			\n\t"	\
"subss	%xmm1, %xmm0; 			\n\t"	\
"movss	" i "+8(%rdi), %xmm1;	#c2	\n\t"	\
"mulss	%xmm3, %xmm1; 			\n\t"	\
"subss	%xmm1, %xmm0; 			\n\t"	\
"movaps	%xmm0, %xmm1;		#iir	\n\t"	\
"addss	%xmm2, %xmm0; 			\n\t"	\
"subss	%xmm3, %xmm0; 			\n\t"	\
"subss	%xmm3, %xmm0;		#val	\n\t"	\
"movss	%xmm3, " i "+0(%rsi); 		\n\t"	\
"movss	%xmm1, " i "+4(%rsi);	");


#define BLOCKEND	asm(					\
"movss	(%rdi), %xmm1;			#c0	\n\t"	\
"mulss	%xmm1, %xmm0;			\n\t"	\
"leave;							\n\t"	\
"ret;								");

				

#if 1

float doHighpass(const float *coef, float *buf, register float val)
{
   BLOCKSUB("0")
   BLOCKSUB("8")
   BLOCKSUB("16")
   BLOCKSUB("24")
   BLOCKEND
   
   return val;
}

float doLowpass(const float *coef, float *buf, register float val)
{
   BLOCKADD("0")
   BLOCKADD("8")
   BLOCKADD("16")
   BLOCKADD("24")
   BLOCKEND
   
   return val;
}

float doBandpass(const float *coef, float *buf, register float val)
{
   BLOCKSUB("0")
   BLOCKSUB("8")
   BLOCKSUB("16")
   BLOCKSUB("24")
   BLOCKADD("32")
   BLOCKADD("40")
   BLOCKADD("48")
   BLOCKADD("56")
   BLOCKEND

   return val;
}

#else

#define FSUB(c1, c2, b0, b1) 	\
   iir = val - c1*b0 - c2*b1;		\
   val = iir + b0 -b1-b1;		\
   b0 = b1;					\
   b1 = iir;

#define FADD(c1, c2, b0, b1) 	\
   iir = val - c1*b0 - c2*b1;		\
   val = iir + b0 +b1+b1;		\
   b0 = b1;					\
   b1 = iir;

float doBandpass(const float *coef, float *buf, register float val)
{
   register float iir;

   FSUB(coef[1],		coef[2], 	buf[0],	buf[1]);
   FSUB(coef[3], 	coef[4], 	buf[2],	buf[3]);
   FSUB(coef[5], 	coef[6], 	buf[4],	buf[5]);
   FSUB(coef[7], 	coef[8], 	buf[6],	buf[7]);
   FADD(coef[9], 	coef[10], 	buf[8],	buf[9]);
   FADD(coef[11],	coef[12], 	buf[10],	buf[11]);
   FADD(coef[13],	coef[14], 	buf[12],	buf[13]);
   FADD(coef[15],	coef[16], 	buf[14],	buf[15]);

   return val * coef[0];
}

float doLowpass(const float *coef, float *buf, register float val)
{
   register float iir;

   FADD(coef[1],		coef[2], 	buf[0],	buf[1]);
   FADD(coef[3], 	coef[4], 	buf[2],	buf[3]);
   FADD(coef[5], 	coef[6], 	buf[4],	buf[5]);
   FADD(coef[7], 	coef[8], 	buf[6],	buf[7]);

   return val * coef[0];
}

float doHighpass(const float *coef, float *buf, register float val)
{
   register float iir;
   
   FSUB(coef[1],		coef[2], 	buf[0],	buf[1]);
   FSUB(coef[3], 	coef[4], 	buf[2],	buf[3]);
   FSUB(coef[5], 	coef[6], 	buf[4],	buf[5]);
   FSUB(coef[7], 	coef[8], 	buf[6],	buf[7]);

   return val * coef[0];
}
#endif

------------------------------------------------------------------------------
Crystal Reports - New Free Runtime and 30 Day Trial
Check out the new simplified licensing option that enables 
unlimited royalty-free distribution of the report engine 
for externally facing server and web deployment. 
http://p.sf.net/sfu/businessobjects

_______________________________________________
Mixxx-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mixxx-devel

[Mixxx-devel] Equalizer code

Reply via email to