[FFmpeg-devel] [PATCH][GSoC] Implement floating point decoding in ALS

Umair Khan Thu, 21 Apr 2016 05:54:08 -0700

Hi,

This patch is the second qualification task of my project.
The patch isn't final yet.
I've got it reviewed by Thilo Borgmann and now sending it here.


Currently, it decodes the floating point data perfectly, and prints
the output to the console.
I matched the output float values with the ones in the original .wav
file and they are same.

Right now, I'm having problem with writing the data to the output
file. The output file generated is almost half the size of what it
should be. I've checked the code and it should work but it doesn't.

I'm also attaching the encoded file having floating point sample data
which I use for testing.

Umair

From cdaf469a9832812755924485e7f83a465652612a Mon Sep 17 00:00:00 2001
From: Umair Khan <omerj...@gmail.com>
Date: Sat, 16 Apr 2016 17:56:13 +0530
Subject: [PATCH] Implement floating point decoding in ALS

Signed-off-by: Umair Khan <omerj...@gmail.com>
---
 libavcodec/alsdec.c | 484 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 479 insertions(+), 5 deletions(-)

diff --git a/libavcodec/alsdec.c b/libavcodec/alsdec.c
index 1402b00..97f650c 100644
--- a/libavcodec/alsdec.c
+++ b/libavcodec/alsdec.c
@@ -37,6 +37,7 @@
 #include "internal.h"
 #include "libavutil/samplefmt.h"
 #include "libavutil/crc.h"
+#include "libavutil/intfloat.h"
 
 #include <stdint.h>
 
@@ -188,6 +189,14 @@ typedef struct ALSChannelData {
 } ALSChannelData;
 
 
+typedef struct MaskedLZDict {
+    int  string_code;
+    int  parent_code;
+    int  char_code;
+    int  match_len;
+} MaskedLZDict;
+
+
 typedef struct ALSDecContext {
     AVCodecContext *avctx;
     ALSSpecificConfig sconf;
@@ -225,6 +234,15 @@ typedef struct ALSDecContext {
     int32_t **raw_samples;          ///< decoded raw samples for each channel
     int32_t *raw_buffer;            ///< contains all decoded raw samples including carryover samples
     uint8_t *crc_buffer;            ///< buffer of byte order corrected samples used for CRC check
+    //float data
+    MaskedLZDict* dict;
+    float *acf;
+    int *last_acf_mantissa;
+    int *shift_value;
+    int *last_shift_value;
+    int **raw_mantissa;           ///< decoded mantissa bits of the difference signal
+    unsigned char *larray;
+    int **nbits;
 } ALSDecContext;
 
 
@@ -247,6 +265,45 @@ typedef struct ALSBlockData {
 } ALSBlockData;
 
 
+/**
+ * Masked LZ compression/decompression
+ */
+
+#define WORD_SIZE           8
+#define WORD_MASK           0xff
+#define CODE_UNSET          -1
+#define CODE_BIT_INIT       9
+#define CODE_BIT_MAX        15
+#define DIC_INDEX_INIT      512     // 2^9
+#define DIC_INDEX_MAX       32768L  // 2^15
+#define FLUSH_CODE          256
+#define FREEZE_CODE         257
+#define FIRST_CODE          258
+#define MAX_CODE            32767L
+#define TABLE_SIZE          35023L  // TABLE_SIZE must be a prime number
+#define MASK_CODE           0
+#define MAX_SEARCH          4       //(DIC_INDEX_MAX)
+
+#define IEEE754_EXP_BIASED  127     // IEEE754 defines exp to be biased by -127
+
+typedef union {
+  float f;
+  struct {
+    unsigned int mantissa : 23;
+    unsigned int exponent : 8;
+    unsigned int sign : 1;
+  } parts;
+} CFloat;
+
+int              dic_code_bit;
+int              current_dic_index_max;
+unsigned int     bump_code;
+unsigned int     flush_code;
+int              next_code;
+int              freeze_flag;
+//Masked LZ ends
+
+
 static av_cold void dprint_specific_config(ALSDecContext *ctx)
 {
 #ifdef DEBUG
@@ -441,7 +498,6 @@ static int check_specific_config(ALSDecContext *ctx)
         }                                               \
     }
 
-    MISSING_ERR(sconf->floating,  "Floating point decoding",     AVERROR_PATCHWELCOME);
     MISSING_ERR(sconf->rlslms,    "Adaptive RLS-LMS prediction", AVERROR_PATCHWELCOME);
 
     return error;
@@ -867,9 +923,6 @@ static int read_var_block_data(ALSDecContext *ctx, ALSBlockData *bd)
                 *current_res++ = decode_rice(gb, s[sb]);
      }
 
-    if (!sconf->mc_coding || ctx->js_switch)
-        align_get_bits(gb);
-
     return 0;
 }
 
@@ -1006,6 +1059,9 @@ static int read_block(ALSDecContext *ctx, ALSBlockData *bd)
  */
 static int decode_block(ALSDecContext *ctx, ALSBlockData *bd)
 {
+    ALSSpecificConfig *sconf = &ctx->sconf;
+    GetBitContext *gb        = &ctx->gb;
+
     unsigned int smp;
     int ret = 0;
 
@@ -1024,6 +1080,9 @@ static int decode_block(ALSDecContext *ctx, ALSBlockData *bd)
         for (smp = 0; smp < bd->block_length; smp++)
             bd->raw_samples[smp] <<= *bd->shift_lsbs;
 
+    if (!sconf->mc_coding || ctx->js_switch)
+        align_get_bits(gb);
+
     return 0;
 }
 
@@ -1350,6 +1409,376 @@ static int revert_channel_correlation(ALSDecContext *ctx, ALSBlockData *bd,
 }
 
 
+//initialize dictionary
+static void init_dict(void) {
+    flush_code               = FLUSH_CODE;
+    current_dic_index_max    = DIC_INDEX_INIT;
+    dic_code_bit             = CODE_BIT_INIT;
+    bump_code                = (DIC_INDEX_INIT - 1);
+    next_code                = FIRST_CODE;
+    freeze_flag              = 0;
+}
+
+
+static int decode_string(unsigned char *buff, int string_code, int *first_char_code, unsigned long bufsize, MaskedLZDict* dict) {
+    unsigned long count, offset;
+    int current_code, parent_code, tmp_code;
+
+    count = 0;
+    current_code = string_code;
+    *first_char_code = CODE_UNSET;
+    while ( count < bufsize ) {
+        switch ( current_code ) {
+        case CODE_UNSET:
+//          printf("Dic Index ERR!!! [stringCode == CODE_UNSET]\n");
+            return count;
+            break;
+        default:
+            if ( current_code < FIRST_CODE ) {
+                *first_char_code = current_code;
+                buff[0] = current_code;
+                count++;
+                return count;
+            } else {
+                offset  = ( dict[current_code].match_len ) - 1;
+                tmp_code = dict[current_code].char_code;
+                buff[offset] = tmp_code;
+                count++;
+            }
+            current_code = dict[current_code].parent_code;
+            if ( ( current_code < 0 ) || ( current_code > ( DIC_INDEX_MAX - 1 ) ) ) {
+//              printf("Dic Index ERR!!!\n");
+                return count;
+            }
+            if ( current_code > FIRST_CODE ) {
+                parent_code = dict[current_code].parent_code;
+                offset = (dict[current_code].match_len) - 1;
+                if ( parent_code < 0 || parent_code > DIC_INDEX_MAX-1 ) {
+//                  fprintf(stderr,"Dic Index ERR!!!\n");
+                    return count;
+                }
+                if (( offset > (DIC_INDEX_MAX - 1))) {
+//                  printf("Dic offset ERR!!!\n");
+                    return count;
+                }
+            }
+            break;
+        }
+    }
+    return count;
+}
+
+
+static void flush_dict(MaskedLZDict* dict) {
+    int i;
+    for ( i = 0; i < TABLE_SIZE; i++ ) {
+        dict[i].string_code = CODE_UNSET;
+        dict[i].parent_code = CODE_UNSET;
+        dict[i].match_len = 0;
+    }
+    //// read first part
+    // initial DicCodes
+    // $0 - 255 xxxx
+    // $256 FLUSH_CODE
+    // $257 FREEZE_CODE
+    // $258 - $(max-2) code
+    // $(max-1) BUMP_CODE
+    // $(max-1) BumpCode  1st BumpCode = 511
+    // add first entry to dictionary as [$258]
+    current_dic_index_max = DIC_INDEX_INIT;
+    dic_code_bit          = CODE_BIT_INIT;  // DicCodeBitInit;
+    bump_code             = current_dic_index_max - 1;
+    next_code             = FIRST_CODE;
+    freeze_flag           = 0;
+}
+
+
+static void set_new_entry_dict(int string_code, int parent_code, int char_code, MaskedLZDict* dict) {
+    dict[string_code].parent_code = parent_code;
+    dict[string_code].string_code = string_code;
+    dict[string_code].char_code   = char_code;
+    if (parent_code < FIRST_CODE) {
+        dict[string_code].match_len = 2;
+    } else {
+//      if ( pDict[parentCode].stringCode == CODE_UNSET )
+//          fprintf(stderr, "Errr stringCode = CODE_UNSET\n");
+        dict[string_code].match_len = (dict[parent_code].match_len) + 1;
+    }
+}
+
+
+static int masked_lz_decompression(ALSDecContext *ctx, int size, unsigned char *buff) {
+    GetBitContext* gb = &ctx->gb;
+    MaskedLZDict *dict = ctx->dict;
+
+    unsigned long output_chars;
+    int string_code, last_string_code, char_code;
+
+    string_code = 0;
+    char_code   = -1;
+    last_string_code = -1;
+
+    output_chars = 0;
+
+    while (output_chars < size) {
+        string_code = get_bits(gb, dic_code_bit);
+        switch (string_code) {
+            case FLUSH_CODE:
+            case MAX_CODE:
+                flush_dict(dict);
+                char_code = -1;
+                last_string_code = -1;
+                break;
+            case FREEZE_CODE:
+                freeze_flag = 1;
+                break;
+            default:
+                if (string_code > current_dic_index_max) {
+                    av_log(ctx->avctx, AV_LOG_ERROR, "string code %d more than the max value.", string_code);
+                    return output_chars;
+                }
+                if (string_code == (int) bump_code) {
+                    ++dic_code_bit;
+                    current_dic_index_max *= 2;
+                    bump_code = current_dic_index_max - 1;
+                } else {
+                    if (string_code >= next_code) {
+                        output_chars += decode_string(&buff[output_chars], last_string_code, &char_code, size - output_chars, dict);
+                        output_chars += decode_string(&buff[output_chars], char_code, &char_code, size - output_chars, dict);
+                        set_new_entry_dict(next_code, last_string_code, char_code, dict);
+                        ++next_code;
+                    } else {
+                        output_chars += decode_string(&buff[output_chars], string_code, &char_code, size - output_chars, dict);
+                        if ((output_chars <= size) && (freeze_flag == 0)) {
+                            if (last_string_code != -1) {
+                                set_new_entry_dict(next_code, last_string_code, char_code, dict);
+                                ++next_code;
+                            } else {
+                                break;
+                            }
+                        }
+                    }
+                    last_string_code = string_code;
+                }
+                break;
+        }
+    }
+    return output_chars;
+}
+
+
+static float multiply(float a, float b) {
+    uint64_t mantissa_temp;
+    uint64_t mask_64;
+    int bit_count;
+    int cutoff_bit_count;
+    unsigned char   last_2_bits;
+    unsigned int    mantissa;
+    int sign;
+    CFloat f1, f2;
+    uint32_t return_val = 0;
+
+    f1.f = a;
+    f2.f = b;
+
+    sign = f1.parts.sign ^ f2.parts.sign;
+
+    //Multiply mantissa bits in a 64-bit register
+    mantissa_temp = (uint64_t) f1.parts.mantissa * (uint64_t) f2.parts.mantissa;
+
+    // Count the valid bit count
+    for( bit_count=48, mask_64=(uint64_t)0x1 << 47; !( mantissa_temp & mask_64 ) && mask_64; bit_count--, mask_64>>=1 );
+
+    // Round off
+    cutoff_bit_count = bit_count - 24;
+    if (cutoff_bit_count > 0) {
+        last_2_bits = (unsigned char)( ( (unsigned int)mantissa_temp >> ( cutoff_bit_count - 1 ) ) & 0x3 );
+        if ( ( last_2_bits == 0x3 ) || ( ( last_2_bits == 0x1 ) && ( (unsigned int)mantissa_temp & ( ( 0x1UL << ( cutoff_bit_count - 1 ) ) - 1 ) ) ) ) {
+            // Need to round up
+            mantissa_temp += (uint64_t)0x1 << cutoff_bit_count;
+        }
+    }
+
+    mantissa = (unsigned int)( mantissa_temp >> cutoff_bit_count );
+    // Need one more shift?
+    if (mantissa & 0x01000000ul) {
+        bit_count++;
+        mantissa >>= 1;
+    }
+
+    if (!sign) {
+        return_val = 0x80000000U;
+    }
+    return_val |= (f1.parts.exponent + f2.parts.exponent + bit_count - 47) << 23;
+    return_val |= mantissa;
+    return av_int2float(return_val);
+}
+
+
+static int read_diff_float_data(ALSDecContext *ctx, unsigned int ra_frame) {
+    AVCodecContext *avctx = ctx->avctx;
+    GetBitContext *gb = &ctx->gb;
+    uint32_t tmp_32, num_bytes_diff_float;
+    int use_acf;
+    float *acf = ctx->acf;
+    int *shift_value = ctx->shift_value;
+    int *last_shift_value = ctx->last_shift_value;
+    int *last_acf_mantissa = ctx->last_acf_mantissa;
+    int **raw_mantissa = ctx->raw_mantissa;
+    int **nbits = ctx->nbits;
+    unsigned char *larray = ctx->larray;
+    unsigned int partA_flag, highest_byte, shift_amp;
+    int frame_length = ctx->cur_frame_length;
+    int nchars;
+    int i, c;
+    long k, nbits_aligned;
+    unsigned long acc, j;
+    uint32_t e;
+    unsigned int mantissa;
+    uint32_t sign;
+    float scale = (float) (0x1u << 23);
+    CFloat temp_pcm;
+
+    num_bytes_diff_float = get_bits_long(gb, 32); //num_bytes_diff_float
+
+    av_log(avctx, AV_LOG_ERROR, "read_diff_float_data() length = %"PRIu32"\n", num_bytes_diff_float);
+    use_acf = get_bits1(gb);
+    if (ra_frame) {
+        for (int c = 0; c < avctx->channels; ++c) {
+            last_acf_mantissa[c] = 0;
+            last_shift_value[c] = 0;
+        }
+        flush_dict(ctx->dict);
+    }
+    for (c = 0; c < avctx->channels; ++c) {
+        if (use_acf) {
+            if (get_bits1(gb) /*acf_flag*/) {
+                tmp_32 = get_bits(gb, 23);
+                last_acf_mantissa[c] = tmp_32;
+            } else {
+                tmp_32 = last_acf_mantissa[c];
+            }
+            acf[c] = av_int2float(tmp_32);
+        } else {
+            acf[c] = 1.0f;
+        }
+        highest_byte = get_bits(gb, 2);
+        shift_amp = get_bits1(gb);
+        partA_flag = get_bits1(gb);
+        if (shift_amp) {
+            shift_value[c] = get_bits(gb, 8);
+            last_shift_value[c] = shift_value[c];
+        } else {
+            shift_value[c] = last_shift_value[c];
+        }
+
+        if (partA_flag) {
+            if (!get_bits1(gb)/*compressed_flag*/) { //uncompressed
+                for (i = 0; i < frame_length; ++i) {
+                    if (ctx->raw_samples[c][i] == 0) {
+                        tmp_32 = get_bits_long(gb, 32);
+                        ctx->raw_samples[c][i] = tmp_32;
+                    }
+                }
+            } else { //compressed
+                nchars = 0;
+                for (i = 0; i < frame_length; ++i) {
+                    if (ctx->raw_samples[c][i] == 0) {
+                        nchars += 4;
+                    }
+                }
+                masked_lz_decompression(ctx, nchars, larray);
+                for (i = 0; i < frame_length; ++i) {
+                    tmp_32 = (larray[i] << 24) | larray[i+1] << 16 | larray[i+2] << 8 | larray[i+3];
+                    ctx->raw_samples[c][i] = tmp_32;
+                }
+            }
+        }
+
+        //decode part B
+        if (highest_byte) {
+            for (i = 0; i < frame_length; ++i) {
+                if (ctx->raw_samples[c][i] != 0) {
+                    //The following logic is taken from Tabel 14.45 and 14.46 from the ISO spec
+                    if (acf[c] == 1.0f) {
+                        nbits[c][i] = 23;
+                    } else {
+                        nbits[c][i] = 23 - av_log2(abs(ctx->raw_samples[c][i]));
+                    }
+                    nbits[c][i] = FFMIN(nbits[c][i], highest_byte*8);
+                }
+            }
+            if (!get_bits1(gb)/*compressed_flag*/) { //uncompressed
+                for (i = 0; i < frame_length; ++i) {
+                    if (ctx->raw_samples[c][i] != 0) {
+                        raw_mantissa[c][i] = get_bits(gb, nbits[c][i]);
+                    }
+                }
+                av_log(avctx, AV_LOG_ERROR, "partB uncompressed\n");
+            } else { //compressed
+                nchars = 0;
+                for (i = 0; i < frame_length; ++i) {
+                    if (ctx->raw_samples[c][i]) {
+                        nchars += (int) nbits[c][i] / 8;
+                        if (nbits[c][i] % 8 > 0) {
+                            ++nchars;
+                        }
+                    }
+                }
+                masked_lz_decompression(ctx, nchars, larray);
+                j = 0;
+                for (i = 0; i < frame_length; ++i) {
+                    if (ctx->raw_samples[c][i]) {
+                        if ((nbits[c][i] % 8) > 0) {
+                            nbits_aligned = 8 * ((unsigned int)(nbits[c][i] / 8) + 1);
+                        } else {
+                            nbits_aligned = nbits[c][i];
+                        }
+                        acc = 0;
+                        for (k = 0; nbits_aligned/8; ++k) {
+                            acc = ( acc << 8 ) + larray[j++];
+                        }
+                        acc >>= ( nbits_aligned - nbits[c][i] );
+                        raw_mantissa[c][i] = acc;
+                    }
+                }
+            }
+        } else {
+            // av_log(avctx, AV_LOG_ERROR, "no part B\n");
+        }
+
+        for (i = 0; i < frame_length; ++i) {
+            if (ctx->raw_samples[c][i] != 0) {
+                if (acf[c] == 1.0f) {
+                    temp_pcm.f = (float) (ctx->raw_samples[c][i] / scale);
+                    // av_log(avctx, AV_LOG_ERROR, "float = %f\n", temp_pcm.f);
+                } else {
+                    temp_pcm.f = multiply(acf[c], (float) (ctx->raw_samples[c][i] / scale));
+                }
+                e = temp_pcm.parts.exponent;
+
+                mantissa = (temp_pcm.parts.mantissa | 0x800000) + raw_mantissa[c][i];
+                // av_log(avctx, AV_LOG_ERROR, "mantissa = %d\n", mantissa);
+                // av_log(avctx, AV_LOG_ERROR, "e = %d", e);
+                while( mantissa >= 0x1000000 ) {
+                    e++;
+                    mantissa >>= 1;
+                }
+                // av_log(avctx, AV_LOG_ERROR, "after e = %d", e);
+                if ( mantissa ) e += (shift_value[c] - 127);
+                mantissa = (mantissa & 0x007fffffUL) | 0x00800000UL;
+                // av_log(avctx, AV_LOG_ERROR,  "exponent = %d shift = %d\n", e, shift_value[c]);
+                tmp_32 = (sign << 31) | (e << 23) | (mantissa & 0x007fffffUL);
+                ctx->raw_samples[c][i] = tmp_32;
+                av_log(avctx, AV_LOG_ERROR, "final output = %f\n", av_int2float(ctx->raw_samples[c][i]));
+            }
+        }
+        align_get_bits(gb);
+    }
+    return 0;
+}
+
+
 /** Read the frame data.
  */
 static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
@@ -1491,7 +1920,9 @@ static int read_frame_data(ALSDecContext *ctx, unsigned int ra_frame)
                     sizeof(*ctx->raw_samples[c]) * sconf->max_order);
     }
 
-    // TODO: read_diff_float_data
+    if (sconf->floating) {
+        read_diff_float_data(ctx, ra_frame);
+    }
 
     if (get_bits_left(gb) < 0) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Overread %d\n", -get_bits_left(gb));
@@ -1661,6 +2092,14 @@ static av_cold int decode_end(AVCodecContext *avctx)
     av_freep(&ctx->chan_data_buffer);
     av_freep(&ctx->reverted_channels);
     av_freep(&ctx->crc_buffer);
+    av_freep(&ctx->dict);
+    av_freep(&ctx->acf);
+    av_freep(&ctx->last_acf_mantissa);
+    av_freep(&ctx->shift_value);
+    av_freep(&ctx->last_shift_value);
+    av_freep(&ctx->raw_mantissa);
+    av_freep(&ctx->larray);
+    av_freep(&ctx->nbits);
 
     return 0;
 }
@@ -1711,6 +2150,12 @@ static av_cold int decode_init(AVCodecContext *avctx)
         }
     }
 
+    if (sconf->floating) {
+        av_log(avctx, AV_LOG_ERROR, "floating is enabled.\n");
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "floating is NOT enabled.\n");
+    }
+
     // set maximum Rice parameter for progressive decoding based on resolution
     // This is not specified in 14496-3 but actually done by the reference
     // codec RM22 revision 2.
@@ -1797,6 +2242,35 @@ static av_cold int decode_init(AVCodecContext *avctx)
     ctx->raw_buffer       = av_mallocz_array(avctx->channels * channel_size, sizeof(*ctx->raw_buffer));
     ctx->raw_samples      = av_malloc_array(avctx->channels, sizeof(*ctx->raw_samples));
 
+    if (sconf->floating) {
+        ctx->dict = av_malloc_array(TABLE_SIZE, sizeof(*ctx->dict));
+        ctx->acf =  av_malloc_array(avctx->channels, sizeof(*ctx->acf));
+        ctx->shift_value = av_malloc_array(avctx->channels, sizeof(*ctx->shift_value));
+        ctx->last_shift_value = av_malloc_array(avctx->channels, sizeof(*ctx->last_shift_value));
+        ctx->last_acf_mantissa = av_malloc_array(avctx->channels, sizeof(*ctx->last_acf_mantissa));
+
+        ctx->raw_mantissa = av_malloc_array(avctx->channels, sizeof(*ctx->raw_mantissa));
+        for (int c = 0; c < avctx->channels; ++c) {
+            ctx->raw_mantissa[c] = av_malloc_array(ctx->cur_frame_length, sizeof(**ctx->raw_mantissa));
+        }
+
+        ctx->larray = av_malloc_array(ctx->cur_frame_length * 4, sizeof(*ctx->larray));
+
+        ctx->nbits = av_malloc_array(avctx->channels, sizeof(*ctx->nbits));
+        for (int c = 0; c < avctx->channels; ++c) {
+            ctx->nbits[c] = av_malloc_array(ctx->cur_frame_length, sizeof(**ctx->nbits));
+        }
+
+        init_dict();
+
+        if (!ctx->dict || !ctx->acf || !ctx->shift_value || !ctx->last_shift_value
+            || !ctx->last_acf_mantissa || !ctx->raw_mantissa) {
+            av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
+            ret = AVERROR(ENOMEM);
+            goto fail;
+        }
+    }
+
     // allocate previous raw sample buffer
     if (!ctx->prev_raw_samples || !ctx->raw_buffer|| !ctx->raw_samples) {
         av_log(avctx, AV_LOG_ERROR, "Allocating buffer memory failed.\n");
-- 
2.5.0

float.mp4
Description: video/mp4

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

[FFmpeg-devel] [PATCH][GSoC] Implement floating point decoding in ALS

Reply via email to