OK, some new code.. Attached are some of my experiments.

 - Code that uses split registers to process two samples per loop.

 - Code for processing interleaved stereo (equally fast to mono version)


TODO:

 - Implement resample code in libDSP in e3dnow asm


-- 
Jussi Laako <[EMAIL PROTECTED]>

#define BUFF_LENGTH 8192
#define BUFF_MIX 2048 //somewhat regular audio chunk size?


volatile float src_buff[BUFF_LENGTH * 2]; //src float buffer
volatile float dst_buff[BUFF_LENGTH * 2]; //dst float buffer


static void resample()
{

	float advance_f=0.3;
	float volume_f=0.2; // 1/5 of max volume

	volatile float *src_ptr=src_buff;
	volatile float *dst_ptr=dst_buff;
	
	/* this code does one sample/loop */
	/*asm volatile (
		"pushal\n\t"
		"movl %0, %%eax\n\t"
		"movl %1, %%ebx\n\t"
		"movl $2048, %%ecx\n\t"
		"pxor %%mm0, %%mm0\n\t"
		"movd %2, %%mm2\n\t"
		"movd %3, %%mm3\n\t"

		"resampleloop:\n\t"

		"pf2id %%mm0, %%mm1\n\t"
		"movd %%mm1, %%edx\n\t"
		"shll $2, %%edx\n\t"
		"addl %%eax, %%edx\n\t"

		"movq (%%edx), %%mm4\n\t"
		"pswapd %%mm4, %%mm5\n\t"
		"pi2fd %%mm1, %%mm6\n\t"

		"pfsubr %%mm4, %%mm5\n\t"
		"pfsubr %%mm0, %%mm6\n\t"
		"pfmul %%mm5, %%mm6\n\t"
		"pfadd %%mm6, %%mm4\n\t"
		"pfmul %%mm2, %%mm4\n\t"

		"movd (%%ebx), %%mm7\n\t"
		"pfadd %%mm4, %%mm7\n\t"
		"movd %%mm7, (%%ebx)\n\t"
		"addl $4, %%ebx\n\t"

		"pfadd %%mm3, %%mm0\n\t"

		"decl %%ecx\n\t"
		"jnz resampleloop\n\t"

		"femms\n\t"
		"popal\n\t"
		:
		: "m" (src_ptr),
		  "m" (dst_ptr),
		  "m" (volume_f),
		  "m" (advance_f)
		: "eax", "ebx", "ecx", "edx", 
		  "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
		  "memory");*/

	/* this code does two samples/loop */
	/*asm volatile (
		"pushal\n\t"
		"movl %0, %%eax\n\t"
		"movl %1, %%ebx\n\t"
		"movl $1024, %%ecx\n\t"
		"pxor %%mm0, %%mm0\n\t"
		"movd %2, %%mm1\n\t"
		"pswapd %%mm1, %%mm7\n\t"
		"pfadd %%mm7, %%mm1\n\t"
		"movd %3, %%mm2\n\t"
		"pfacc %%mm2, %%mm0\n\t"

		"resampleloop:\n\t"

		"pf2id %%mm0, %%mm3\n\t"

		"movd %%mm3, %%edx\n\t"
		"shll $2, %%edx\n\t"
		"addl %%eax, %%edx\n\t"
		"movd (%%edx), %%mm4\n\t"
		"addl $4, %%edx\n\t"
		"movd (%%edx), %%mm5\n\t"

		"pswapd %%mm3, %%mm6\n\t"
		
		"movd %%mm6, %%edx\n\t"
		"shll $2, %%edx\n\t"
		"addl %%eax, %%edx\n\t"
		"movd (%%edx), %%mm6\n\t"
		"pswapd %%mm6, %%mm7\n\t"
		"pfadd %%mm7, %%mm4\n\t"
		"addl $4, %%edx\n\t"
		"movd (%%edx), %%mm6\n\t"
		"pswapd %%mm6, %%mm7\n\t"
		"pfadd %%mm7, %%mm5\n\t"

		"pi2fd %%mm3, %%mm6\n\t"
		"pfsubr %%mm4, %%mm5\n\t"
		"pfsubr %%mm0, %%mm6\n\t"
		"pfmul %%mm5, %%mm6\n\t"
		"pfadd %%mm6, %%mm4\n\t"
		"pfmul %%mm2, %%mm4\n\t"

		"movq (%%ebx), %%mm7\n\t"
		"pfadd %%mm4, %%mm7\n\t"
		"movq %%mm7, (%%ebx)\n\t"
		"addl $8, %%ebx\n\t"

		"pfacc %%mm2, %%mm0\n\t"

		"decl %%ecx\n\t"
		"jnz resampleloop\n\t"

		"femms\n\t"
		"popal\n\t"
		:
		: "m" (src_ptr),
		  "m" (dst_ptr),
		  "m" (volume_f),
		  "m" (advance_f)
		: "eax", "ebx", "ecx", "edx", 
		  "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
		  "memory");*/

	/* this is for interleaved stereo */
	asm volatile (
		"pushal\n\t"
		"movl %0, %%eax\n\t"
		"movl %1, %%ebx\n\t"
		"movl $2048, %%ecx\n\t"
		"pxor %%mm0, %%mm0\n\t"
		"movd %2, %%mm1\n\t"
		"pswapd %%mm1, %%mm7\n\t"
		"pfadd %%mm7, %%mm1\n\t"
		"movd %3, %%mm2\n\t"
		"pswapd %%mm2, %%mm7\n\t"
		"pfadd %%mm7, %%mm2\n\t"

		"resampleloop:\n\t"

		"pf2id %%mm0, %%mm3\n\t"

		"movd %%mm3, %%edx\n\t"
		"shll $3, %%edx\n\t"
		"addl %%eax, %%edx\n\t"
		"movq (%%edx), %%mm4\n\t"
		"addl $8, %%edx\n\t"
		"movq (%%edx), %%mm5\n\t"

		"pi2fd %%mm3, %%mm6\n\t"
		"pfsubr %%mm4, %%mm5\n\t"
		"pfsubr %%mm0, %%mm6\n\t"
		"pfmul %%mm5, %%mm6\n\t"
		"pfadd %%mm6, %%mm4\n\t"
		"pfmul %%mm2, %%mm4\n\t"

		"movq (%%ebx), %%mm7\n\t"
		"pfadd %%mm4, %%mm7\n\t"
		"movq %%mm7, (%%ebx)\n\t"
		"addl $8, %%ebx\n\t"

		"pfadd %%mm2, %%mm0\n\t"

		"decl %%ecx\n\t"
		"jnz resampleloop\n\t"

		"femms\n\t"
		"popal\n\t"
		:
		: "m" (src_ptr),
		  "m" (dst_ptr),
		  "m" (volume_f),
		  "m" (advance_f)
		: "eax", "ebx", "ecx", "edx", 
		  "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7",
		  "memory");
	
    /*while (todo--) {

	pos=float_to_int(counter);
	//pos=todo>>2;
	res=src_ptr[pos];
	res_next=src_ptr[pos+1];
        counter+=advance_f;

            //this line works perfect, since res and res_next contain 16 bits values
	res+= (res_next-res) * (counter-(float)pos);
	res*=volume_f; //thanks to the volume, now the value goes into the higher 16 bits

        *dst_ptr++ +=res;
    }*/

}


#define TEST_REPEAT 100000

int main() {


    //fill it with some data
    int i;
    for (i=0;i<BUFF_LENGTH;i++) {

	src_buff[i]=i;
        dst_buff[i]=i;

    }

    for (i=0;i<TEST_REPEAT;i++) {

        resample();

    }

    return 0;
}

Reply via email to