Josh Coalson <[EMAIL PROTECTED]> wrote: > thanks for the patch. No prob :)
> also, if you have miroslav's patch again a more updated version > of bitbuffer.c that would be great. I have been meaning to get > around to applying it for a long time. This is Miroslav's patch, from the mailing list post I dug up in the archives: --- orig/src/libFLAC/bitbuffer.c +++ mod/src/libFLAC/bitbuffer.c @@ -62,6 +62,24 @@ * keeping in mind the limit from the first paragraph. */ static const unsigned FLAC__BITBUFFER_DEFAULT_CAPACITY = ((65536 - 64) * 8) / FLAC__BITS_PER_BLURB; /* blurbs */ +static const unsigned char byte_to_unary_table[] = { + 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; #if FLAC__BITS_PER_BLURB == 8 #define FLAC__BITS_PER_BLURB_LOG2 3 @@ -70,6 +88,7 @@ #define FLAC__BLURB_TOP_BIT_ONE ((FLAC__byte)0x80) #define BLURB_BIT_TO_MASK(b) (((FLAC__blurb)'\x80') >> (b)) #define CRC16_UPDATE_BLURB(bb, blurb, crc) FLAC__CRC16_UPDATE((blurb), (crc)); +#define FLAC__ALIGNED_BLURB_UNARY(blurb) (byte_to_unary_table[blurb]) #elif FLAC__BITS_PER_BLURB == 32 #define FLAC__BITS_PER_BLURB_LOG2 5 #define FLAC__BYTES_PER_BLURB 4 @@ -77,6 +96,13 @@ #define FLAC__BLURB_TOP_BIT_ONE ((FLAC__uint32)0x80000000) #define BLURB_BIT_TO_MASK(b) (((FLAC__blurb)0x80000000) >> (b)) #define CRC16_UPDATE_BLURB(bb, blurb, crc) crc16_update_blurb((bb), (blurb)); +#define FLAC__ALIGNED_BLURB_UNARY(blurb) ((blurb) <= 0xff \ + ? byte_to_unary_table[blurb] + 24 \ + : ((blurb) <= 0xffff \ + ? byte_to_unary_table[(blurb) >> 8] + 16 \ + : ((blurb) <=0xffffff \ + ? byte_to_unary_table[(blurb) >> 16] + 8 \ + : byte_to_unary_table[(blurb) >> 24]))) #else /* ERROR, only sizes of 8 and 32 are supported */ #endif @@ -2109,114 +2135,18 @@ if(nvals == 0) return true; + cbits = bb->consumed_bits; i = bb->consumed_blurbs; - /* - * We unroll the main loop to take care of partially consumed blurbs here. - */ - if(bb->consumed_bits > 0) { - save_blurb = blurb = buffer[i]; - cbits = bb->consumed_bits; - blurb <<= cbits; - - while(1) { - if(state == 0) { - if(blurb) { - for(j = 0; !(blurb & FLAC__BLURB_TOP_BIT_ONE); j++) - blurb <<= 1; - msbs += j; - - /* dispose of the unary end bit */ - blurb <<= 1; - j++; - cbits += j; - - uval = 0; - lsbs_left = parameter; - state++; - if(cbits == FLAC__BITS_PER_BLURB) { - cbits = 0; - CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16); - break; - } - } - else { - msbs += FLAC__BITS_PER_BLURB - cbits; - cbits = 0; - CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16); - break; - } - } - else { - const unsigned available_bits = FLAC__BITS_PER_BLURB - cbits; - if(lsbs_left >= available_bits) { - uval <<= available_bits; - uval |= (blurb >> cbits); - cbits = 0; - CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16); - - if(lsbs_left == available_bits) { - /* compose the value */ - uval |= (msbs << parameter); - if(uval & 1) - vals[val_i++] = -((int)(uval >> 1)) - 1; - else - vals[val_i++] = (int)(uval >> 1); - if(val_i == nvals) - break; - - msbs = 0; - state = 0; - } - - lsbs_left -= available_bits; - break; - } - else { - uval <<= lsbs_left; - uval |= (blurb >> (FLAC__BITS_PER_BLURB - lsbs_left)); - blurb <<= lsbs_left; - cbits += lsbs_left; - - /* compose the value */ - uval |= (msbs << parameter); - if(uval & 1) - vals[val_i++] = -((int)(uval >> 1)) - 1; - else - vals[val_i++] = (int)(uval >> 1); - if(val_i == nvals) { - /* back up one if we exited the for loop because we read all nvals but the end came in the middle of a blurb */ - i--; - break; - } - - msbs = 0; - state = 0; - } - } - } - i++; - - bb->consumed_blurbs = i; - bb->consumed_bits = cbits; - bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits; - } - - /* - * Now that we are blurb-aligned the logic is slightly simpler - */ + while(val_i < nvals) { - for( ; i < bb->blurbs && val_i < nvals; i++) { - save_blurb = blurb = buffer[i]; - cbits = 0; + for( ; i < bb->blurbs; i++) { + blurb = (save_blurb = buffer[i]) << cbits; while(1) { if(state == 0) { if(blurb) { - for(j = 0; !(blurb & FLAC__BLURB_TOP_BIT_ONE); j++) - blurb <<= 1; + j = FLAC__ALIGNED_BLURB_UNARY(blurb); msbs += j; - /* dispose of the unary end bit */ - blurb <<= 1; j++; cbits += j; @@ -2228,6 +2158,7 @@ CRC16_UPDATE_BLURB(bb, save_blurb, bb->read_crc16); break; } + blurb <<= j; } else { msbs += FLAC__BITS_PER_BLURB - cbits; @@ -2247,12 +2178,11 @@ if(lsbs_left == available_bits) { /* compose the value */ uval |= (msbs << parameter); - if(uval & 1) - vals[val_i++] = -((int)(uval >> 1)) - 1; - else - vals[val_i++] = (int)(uval >> 1); - if(val_i == nvals) - break; + vals[val_i++] = (int)(uval >> 1 ^ -(int)(uval & 1)); + if(val_i == nvals) { + i++; + goto break2; + } msbs = 0; state = 0; @@ -2262,29 +2192,23 @@ break; } else { + cbits += lsbs_left; uval <<= lsbs_left; uval |= (blurb >> (FLAC__BITS_PER_BLURB - lsbs_left)); blurb <<= lsbs_left; - cbits += lsbs_left; /* compose the value */ uval |= (msbs << parameter); - if(uval & 1) - vals[val_i++] = -((int)(uval >> 1)) - 1; - else - vals[val_i++] = (int)(uval >> 1); - if(val_i == nvals) { - /* back up one if we exited the for loop because we read all nvals but the end came in the middle of a blurb */ - i--; - break; - } - + vals[val_i++] = (int)(uval >> 1 ^ -(int)(uval & 1)); + if(val_i == nvals) + goto break2; msbs = 0; state = 0; } } } } +break2: bb->consumed_blurbs = i; bb->consumed_bits = cbits; bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits; This is my patch on top of Miroslavs, which micro optimizes away some indexing and comparison overhead. --- orig/src/libFLAC/bitbuffer.c +++ mod/src/libFLAC/bitbuffer.c @@ -2123,7 +2123,7 @@ { const FLAC__blurb *buffer = bb->buffer; - unsigned i, j, val_i = 0; + unsigned i, j, val_i = nvals; unsigned cbits = 0, uval = 0, msbs = 0, lsbs_left = 0; FLAC__blurb blurb, save_blurb; unsigned state = 0; /* 0 = getting unary MSBs, 1 = getting binary LSBs */ @@ -2138,7 +2138,7 @@ cbits = bb->consumed_bits; i = bb->consumed_blurbs; - while(val_i < nvals) { + while(val_i != 0) { for( ; i < bb->blurbs; i++) { blurb = (save_blurb = buffer[i]) << cbits; while(1) { @@ -2178,11 +2178,13 @@ if(lsbs_left == available_bits) { /* compose the value */ uval |= (msbs << parameter); - vals[val_i++] = (int)(uval >> 1 ^ -(int)(uval & 1)); - if(val_i == nvals) { + *vals = (int)(uval >> 1 ^ -(int)(uval & 1)); + --val_i; + if(val_i == 0) { i++; goto break2; } + *(++vals); msbs = 0; state = 0; @@ -2199,9 +2201,12 @@ /* compose the value */ uval |= (msbs << parameter); - vals[val_i++] = (int)(uval >> 1 ^ -(int)(uval & 1)); - if(val_i == nvals) + *vals = (int)(uval >> 1 ^ -(int)(uval & 1)); + --val_i; + if(val_i == 0) goto break2; + *(++vals); + msbs = 0; state = 0; } @@ -2212,7 +2217,7 @@ bb->consumed_blurbs = i; bb->consumed_bits = cbits; bb->total_consumed_bits = (i << FLAC__BITS_PER_BLURB_LOG2) | cbits; - if(val_i < nvals) { + if(val_i > 0) { if(!bitbuffer_read_from_client_(bb, read_callback, client_data)) return false; /* these must be zero because we can only get here if we got to the end of the buffer */ > > btw how are you playing it on the ipod? With a modified version of Music Player Daemon (MPD, www.musicpd.org). http://www.ipodlinux.org/MPD has more info about it. > not sure how to help out with lpc_restore_signal(). there are > x86 and PPC versions of the routines in CVS that might be good > references. it is basically a multiply-accumulate loop but you > have to be careful about overflow. an ARM7 version of this > function would help the speed on several other FLAC-supported > devices. This is my current C version of lpc_restore_signal. It only works when (order<=8) (not a problem for me right now), but the inner loop for larger orders could be done using Duff's device and still be fast without getting much bigger. I reading up on ARM assembly right now. void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]) { unsigned i; FLAC__int32 sum; const FLAC__int32 *history, *qlp; const int tmp = (0 - order - 1); for(i = data_len; i != 0; --i) { sum = 0; qlp = &qlp_coeff[order]; history = &data[tmp]; switch (order) { case 8: sum += (*(--qlp)) * (*(++history)); case 7: sum += (*(--qlp)) * (*(++history)); case 6: sum += (*(--qlp)) * (*(++history)); case 5: sum += (*(--qlp)) * (*(++history)); case 4: sum += (*(--qlp)) * (*(++history)); case 3: sum += (*(--qlp)) * (*(++history)); case 2: sum += (*(--qlp)) * (*(++history)); case 1: sum += (*(--qlp)) * (*(++history)); break; } *(data++) = *(residual++) + (sum >> lp_quantization); } } -- Eric Wong
signature.asc
Description: Digital signature
_______________________________________________ Flac-dev mailing list Flac-dev@xiph.org http://lists.xiph.org/mailman/listinfo/flac-dev