Here's what I listed in that email. Merging doesn't appear to be necessary. If
you have any build problems, let me know.

Note that my detection code is Darwin-specific. It's a BSD call (sysctl()), so
a change to the platform-detection macros should enable it to work on other
BSDs. However, I don't know what that would be, and I couldn't determine any
safe way to do the check in Linux, nor do I have any way to test anything other
than OS X. The altivec code itself should work on any platform with altivec.

Regarding performance, command-line decoding will see a modest improvement, but
it's really bottlenecked by the MD5 checking (and I don't have a clue as to how
to optimize that). Real-time decoding should be improved substantially -- I've
been meaning to test that and will get back to you with the results.

--
Brady Patterson ([EMAIL PROTECTED])
RLRR LRLL RLLR LRRL RRLR LLRL

On Sun, 25 Jul 2004, Josh Coalson wrote:
> PS, Brady, do you still have these patches?  you don't have to
> make them current, just send them as-is, I can merge them.
Index: configure.in
===================================================================
RCS file: /cvsroot/flac/flac/configure.in,v
retrieving revision 1.82
diff -c -r1.82 configure.in
*** configure.in        19 May 2003 23:59:49 -0000      1.82
--- configure.in        25 Jul 2004 23:13:35 -0000
***************
*** 208,213 ****
--- 208,225 ----
  AC_DEFINE(FLAC__USE_3DNOW)
  fi
  
+ AC_ARG_ENABLE(altivec,
+ [  --disable-altivec              Disable Altivec optimizations],
+ [case "${enableval}" in
+       yes) use_altivec=true ;;
+       no)  use_altivec=false ;;
+       *) AC_MSG_ERROR(bad value ${enableval} for --enable-altivec) ;;
+ esac],[use_altivec=true])
+ AM_CONDITIONAL(FLaC__USE_ALTIVEC, test x$use_altivec = xtrue)
+ if test x$use_altivec = xtrue ; then
+ AC_DEFINE(FLAC__USE_ALTIVEC)
+ fi
+ 
  AC_ARG_ENABLE(local-xmms-plugin,
  [  --enable-local-xmms-plugin     Install XMMS plugin to ~/.xmms/Plugins instead of 
system location],
  [case "${enableval}" in
***************
*** 380,385 ****
--- 392,398 ----
  AH_TEMPLATE(FLAC__NO_ASM,  [define to disable use of assembly code])
  AH_TEMPLATE(FLAC__SSE_OS,  [define if your operating system supports SSE 
instructions])
  AH_TEMPLATE(FLAC__USE_3DNOW,  [define to enable use of 3Dnow! instructions])
+ AH_TEMPLATE(FLAC__USE_ALTIVEC,  [define to enable use of Altivec instructions])
  AH_TEMPLATE(ID3LIB_MAJOR,  [define to major version number of id3lib])
  AH_TEMPLATE(ID3LIB_MINOR,  [define to minor version number of id3lib])
  AH_TEMPLATE(ID3LIB_PATCH,  [define to patch level of id3lib])
***************
*** 389,394 ****
--- 402,408 ----
        src/Makefile \
        src/libFLAC/Makefile \
        src/libFLAC/ia32/Makefile \
+       src/libFLAC/ppc/Makefile \
        src/libFLAC/include/Makefile \
        src/libFLAC/include/private/Makefile \
        src/libFLAC/include/protected/Makefile \
Index: cpu.c
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/cpu.c,v
retrieving revision 1.14
diff -c -r1.14 cpu.c
*** cpu.c       31 Jan 2003 23:34:57 -0000      1.14
--- cpu.c       25 Jul 2004 23:16:52 -0000
***************
*** 37,42 ****
--- 37,50 ----
  #include <config.h>
  #endif
  
+ #if defined FLAC__CPU_PPC
+ #if !defined FLAC__NO_ASM
+ #if defined __APPLE__ && defined __MACH__
+ #include <sys/sysctl.h>
+ #endif /* __APPLE__ && __MACH__ */
+ #endif /* FLAC__NO_ASM */
+ #endif /* FLAC__CPU_PPC */
+ 
  const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
  const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
  const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
***************
*** 78,83 ****
--- 86,115 ----
  #else
        info->use_asm = false;
  #endif
+ #elif defined FLAC__CPU_PPC
+       info->type = FLAC__CPUINFO_TYPE_PPC;
+ #if !defined FLAC__NO_ASM
+       info->use_asm = true;
+ #ifdef FLAC__USE_ALTIVEC
+ #if defined __APPLE__ && defined __MACH__
+       {
+               int selectors[2] = { CTL_HW, HW_VECTORUNIT };
+               int result = 0;
+               size_t length = sizeof(result);
+               int error = sysctl(selectors, 2, &result, &length, 0, 0);
+ 
+               info->data.ppc.altivec = error==0 ? result!=0 : 0;
+       }
+ #else /* __APPLE__ && __MACH__ */
+       /* don't know of any other thread-safe way to check */
+       info->data.ppc.altivec = 0;
+ #endif /* __APPLE__ && __MACH__ */
+ #else /* FLAC__USE_ALTIVEC */
+       info->data.ppc.altivec = 0;
+ #endif /* FLAC__USE_ALTIVEC */
+ #else /* FLAC__NO_ASM */
+       info->use_asm = false;
+ #endif /* FLAC__NO_ASM */
  #else
        info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
        info->use_asm = false;
Index: stream_decoder.c
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/stream_decoder.c,v
retrieving revision 1.87
diff -c -r1.87 stream_decoder.c
*** stream_decoder.c    20 May 2003 00:01:50 -0000      1.87
--- stream_decoder.c    25 Jul 2004 23:17:39 -0000
***************
*** 101,110 ****
        void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void *client_data;
        FLAC__BitBuffer *input;
        FLAC__int32 *output[FLAC__MAX_CHANNELS];
!       FLAC__int32 *residual[FLAC__MAX_CHANNELS];
        FLAC__EntropyCodingMethod_PartitionedRiceContents 
partitioned_rice_contents[FLAC__MAX_CHANNELS];
        unsigned output_capacity, output_channels;
        FLAC__uint32 last_frame_number;
--- 101,111 ----
        void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned 
data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
+       void (*local_lpc_restore_signal_16bit_order8)(const FLAC__int32 residual[], 
unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, 
FLAC__int32 data[]);
        void *client_data;
        FLAC__BitBuffer *input;
        FLAC__int32 *output[FLAC__MAX_CHANNELS];
!       FLAC__int32 *residual[FLAC__MAX_CHANNELS]; /* must add 15 and mask low 4 bits 
before using */
        FLAC__EntropyCodingMethod_PartitionedRiceContents 
partitioned_rice_contents[FLAC__MAX_CHANNELS];
        unsigned output_capacity, output_channels;
        FLAC__uint32 last_frame_number;
***************
*** 281,286 ****
--- 282,288 ----
        decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
        decoder->private_->local_lpc_restore_signal_64bit = 
FLAC__lpc_restore_signal_wide;
        decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
+       decoder->private_->local_lpc_restore_signal_16bit_order8 = 
FLAC__lpc_restore_signal;
        /* now override with asm where appropriate */
  #ifndef FLAC__NO_ASM
        if(decoder->private_->cpuinfo.use_asm) {
***************
*** 290,301 ****
--- 292,311 ----
                if(decoder->private_->cpuinfo.data.ia32.mmx) {
                        decoder->private_->local_lpc_restore_signal = 
FLAC__lpc_restore_signal_asm_ia32;
                        decoder->private_->local_lpc_restore_signal_16bit = 
FLAC__lpc_restore_signal_asm_ia32_mmx;
+                       decoder->private_->local_lpc_restore_signal_16bit_order8 = 
FLAC__lpc_restore_signal_asm_ia32_mmx;
                }
                else {
                        decoder->private_->local_lpc_restore_signal = 
FLAC__lpc_restore_signal_asm_ia32;
                        decoder->private_->local_lpc_restore_signal_16bit = 
FLAC__lpc_restore_signal_asm_ia32;
+                       decoder->private_->local_lpc_restore_signal_16bit_order8 = 
FLAC__lpc_restore_signal_asm_ia32;
                }
  #endif
+ #elif defined FLAC__CPU_PPC
+               FLAC__ASSERT(decoder->private_->cpuinfo.type == 
FLAC__CPUINFO_TYPE_PPC);
+               if(decoder->private_->cpuinfo.data.ppc.altivec) {
+                       decoder->private_->local_lpc_restore_signal_16bit = 
FLAC__lpc_restore_signal_asm_ppc_altivec_16;
+                       decoder->private_->local_lpc_restore_signal_16bit_order8 = 
FLAC__lpc_restore_signal_asm_ppc_altivec_16_order8;
+               }
  #endif
        }
  #endif
***************
*** 748,754 ****
                memset(tmp, 0, sizeof(FLAC__int32)*4);
                decoder->private_->output[i] = tmp + 4;
  
!               tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*size);
                if(tmp == 0) {
                        decoder->protected_->state = 
FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
                        return false;
--- 758,766 ----
                memset(tmp, 0, sizeof(FLAC__int32)*4);
                decoder->private_->output[i] = tmp + 4;
  
!               /* need quadword alignment for vector optimizations: */
!     /* allocate extra 15 bytes; then must add 15 and mask low 4 bits before using */
!               tmp = (FLAC__int32*)malloc(sizeof(FLAC__int32)*size+15U);
                if(tmp == 0) {
                        decoder->protected_->state = 
FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
                        return false;
***************
*** 1809,1818 ****
        FLAC__int32 i32;
        FLAC__uint32 u32;
        unsigned u;
  
        decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_FIXED;
  
!       subframe->residual = decoder->private_->residual[channel];
        subframe->order = order;
  
        /* read warm-up samples */
--- 1821,1831 ----
        FLAC__int32 i32;
        FLAC__uint32 u32;
        unsigned u;
+       FLAC__int32 *residual = (FLAC__int32 
*)((long)decoder->private_->residual[channel]+15U & ~0xf);
  
        decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_FIXED;
  
!       subframe->residual = residual;
        subframe->order = order;
  
        /* read warm-up samples */
***************
*** 1841,1847 ****
        /* read residual */
        switch(subframe->entropy_coding_method.type) {
                case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
!                       if(!read_residual_partitioned_rice_(decoder, order, 
subframe->entropy_coding_method.data.partitioned_rice.order, 
&decoder->private_->partitioned_rice_contents[channel], 
decoder->private_->residual[channel]))
                                return false;
                        break;
                default:
--- 1854,1860 ----
        /* read residual */
        switch(subframe->entropy_coding_method.type) {
                case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
!                       if(!read_residual_partitioned_rice_(decoder, order, 
subframe->entropy_coding_method.data.partitioned_rice.order, 
&decoder->private_->partitioned_rice_contents[channel], residual))
                                return false;
                        break;
                default:
***************
*** 1850,1856 ****
  
        /* decode the subframe */
        memcpy(decoder->private_->output[channel], subframe->warmup, 
sizeof(FLAC__int32) * order);
!       FLAC__fixed_restore_signal(decoder->private_->residual[channel], 
decoder->private_->frame.header.blocksize-order, order, 
decoder->private_->output[channel]+order);
  
        return true;
  }
--- 1863,1869 ----
  
        /* decode the subframe */
        memcpy(decoder->private_->output[channel], subframe->warmup, 
sizeof(FLAC__int32) * order);
!       FLAC__fixed_restore_signal(residual, 
decoder->private_->frame.header.blocksize-order, order, 
decoder->private_->output[channel]+order);
  
        return true;
  }
***************
*** 1861,1870 ****
        FLAC__int32 i32;
        FLAC__uint32 u32;
        unsigned u;
  
        decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_LPC;
  
!       subframe->residual = decoder->private_->residual[channel];
        subframe->order = order;
  
        /* read warm-up samples */
--- 1874,1884 ----
        FLAC__int32 i32;
        FLAC__uint32 u32;
        unsigned u;
+       FLAC__int32 *residual = (FLAC__int32 
*)((long)decoder->private_->residual[channel]+15U & ~0xf);
  
        decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_LPC;
  
!       subframe->residual = residual;
        subframe->order = order;
  
        /* read warm-up samples */
***************
*** 1915,1921 ****
        /* read residual */
        switch(subframe->entropy_coding_method.type) {
                case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
!                       if(!read_residual_partitioned_rice_(decoder, order, 
subframe->entropy_coding_method.data.partitioned_rice.order, 
&decoder->private_->partitioned_rice_contents[channel], 
decoder->private_->residual[channel]))
                                return false;
                        break;
                default:
--- 1929,1935 ----
        /* read residual */
        switch(subframe->entropy_coding_method.type) {
                case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE:
!                       if(!read_residual_partitioned_rice_(decoder, order, 
subframe->entropy_coding_method.data.partitioned_rice.order, 
&decoder->private_->partitioned_rice_contents[channel], residual))
                                return false;
                        break;
                default:
***************
*** 1925,1936 ****
        /* decode the subframe */
        memcpy(decoder->private_->output[channel], subframe->warmup, 
sizeof(FLAC__int32) * order);
        if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
!               if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
!                       
decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel],
 decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
                else
!                       
decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], 
decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
        else
!               
decoder->private_->local_lpc_restore_signal_64bit(decoder->private_->residual[channel],
 decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
  
        return true;
  }
--- 1939,1954 ----
        /* decode the subframe */
        memcpy(decoder->private_->output[channel], subframe->warmup, 
sizeof(FLAC__int32) * order);
        if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
!               if(bps <= 16 && subframe->qlp_coeff_precision <= 16) {
!                       if(order <= 8)
!                               
decoder->private_->local_lpc_restore_signal_16bit_order8(residual, 
decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
!                       else
!                               
decoder->private_->local_lpc_restore_signal_16bit(residual, 
decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
!               }
                else
!                       decoder->private_->local_lpc_restore_signal(residual, 
decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
        else
!               decoder->private_->local_lpc_restore_signal_64bit(residual, 
decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, 
subframe->quantization_level, decoder->private_->output[channel]+order);
  
        return true;
  }
***************
*** 1938,1945 ****
  FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, 
unsigned bps)
  {
        FLAC__Subframe_Verbatim *subframe = 
&decoder->private_->frame.subframes[channel].data.verbatim;
!       FLAC__int32 x, *residual = decoder->private_->residual[channel];
        unsigned i;
  
        decoder->private_->frame.subframes[channel].type = 
FLAC__SUBFRAME_TYPE_VERBATIM;
  
--- 1956,1966 ----
  FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, 
unsigned bps)
  {
        FLAC__Subframe_Verbatim *subframe = 
&decoder->private_->frame.subframes[channel].data.verbatim;
!       FLAC__int32 x;
        unsigned i;
+       FLAC__int32 *residual = (FLAC__int32 
*)((long)decoder->private_->residual[channel]+15U & ~0xf);
+ 
+       FLAC__ASSERT((((long)residual) & 0xf) == 0);
  
        decoder->private_->frame.subframes[channel].type = 
FLAC__SUBFRAME_TYPE_VERBATIM;
  
***************
*** 1965,1970 ****
--- 1986,1993 ----
        const unsigned partitions = 1u << partition_order;
        const unsigned partition_samples = partition_order > 0? 
decoder->private_->frame.header.blocksize >> partition_order : 
decoder->private_->frame.header.blocksize - predictor_order;
  
+       FLAC__ASSERT((((long)residual) & 0xf) == 0);
+ 
        
if(!FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents,
 max(6, partition_order))) {
                decoder->protected_->state = 
FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
                return false;
Index: cpu.h
===================================================================
RCS file: /cvsroot/flac/flac/src/libFLAC/include/private/cpu.h,v
retrieving revision 1.11
diff -c -r1.11 cpu.h
*** cpu.h       31 Jan 2003 23:34:58 -0000      1.11
--- cpu.h       25 Jul 2004 23:15:40 -0000
***************
*** 40,45 ****
--- 40,46 ----
  
  typedef enum {
        FLAC__CPUINFO_TYPE_IA32,
+       FLAC__CPUINFO_TYPE_PPC,
        FLAC__CPUINFO_TYPE_UNKNOWN
  } FLAC__CPUInfo_Type;
  
***************
*** 54,59 ****
--- 55,64 ----
        FLAC__bool extmmx;
  } FLAC__CPUInfo_IA32;
  
+ typedef struct {
+       FLAC__bool altivec;
+ } FLAC__CPUInfo_PPC;
+ 
  extern const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV;
  extern const unsigned FLAC__CPUINFO_IA32_CPUID_MMX;
  extern const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR;
***************
*** 69,74 ****
--- 74,80 ----
        FLAC__CPUInfo_Type type;
        union {
                FLAC__CPUInfo_IA32 ia32;
+               FLAC__CPUInfo_PPC ppc;
        } data;
  } FLAC__CPUInfo;
  

Reply via email to