This does all the actual bit counting as a final step.
x86 benchmarks:
50% faster in function count_mantissa_bits()
16% faster in function bit_alloc()
---
 libavcodec/ac3dsp.c              |   33 ++++++++--------
 libavcodec/ac3dsp.h              |    4 +-
 libavcodec/ac3enc.c              |   78 +++++++++++++++++++++-----------------
 libavcodec/arm/Makefile          |    1 -
 libavcodec/arm/ac3dsp_arm.S      |   52 -------------------------
 libavcodec/arm/ac3dsp_init_arm.c |    2 -
 6 files changed, 63 insertions(+), 107 deletions(-)
 delete mode 100644 libavcodec/arm/ac3dsp_arm.S

diff --git a/libavcodec/ac3dsp.c b/libavcodec/ac3dsp.c
index e3ca37e..374dd6e 100644
--- a/libavcodec/ac3dsp.c
+++ b/libavcodec/ac3dsp.c
@@ -128,24 +128,25 @@ static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
     } while (end > ff_ac3_band_start_tab[band++]);
 }
 
-static int ac3_compute_mantissa_size_c(int mant_cnt[5], uint8_t *bap,
-                                       int nb_coefs)
-{
-    int bits, b, i;
+DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
+    0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
+};
 
-    bits = 0;
-    for (i = 0; i < nb_coefs; i++) {
-        b = bap[i];
-        if (b <= 4) {
-            // bap=1 to bap=4 will be counted in compute_mantissa_size_final
-            mant_cnt[b]++;
-        } else if (b <= 13) {
-            // bap=5 to bap=13 use (bap-1) bits
-            bits += b - 1;
-        } else {
-            // bap=14 uses 14 bits and bap=15 uses 16 bits
-            bits += (b == 14) ? 14 : 16;
-        }
+static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
+{
+    int blk, bap;
+    int bits = 0;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        // bap=1 : 3 mantissas in 5 bits
+        bits += (mant_cnt[blk][1] / 3) * 5;
+        // bap=2 : 3 mantissas in 7 bits
+        // bap=4 : 2 mantissas in 7 bits
+        bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
+
+        bits += mant_cnt[blk][3] * 3;
+        for (bap = 5; bap < 16; bap++)
+            bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
     }
     return bits;
 }
diff --git a/libavcodec/ac3dsp.h b/libavcodec/ac3dsp.h
index b750767..228b105 100644
--- a/libavcodec/ac3dsp.h
+++ b/libavcodec/ac3dsp.h
@@ -24,6 +24,8 @@
 
 #include <stdint.h>
 
+extern const uint16_t ff_ac3_bap_bits[16];
+
 typedef struct AC3DSPContext {
     /**
      * Set each encoded exponent in a block to the minimum of itself and the
@@ -104,7 +106,7 @@ typedef struct AC3DSPContext {
     /**
      * Calculate the number of bits needed to encode a set of mantissas.
      */
-    int (*compute_mantissa_size)(int mant_cnt[5], uint8_t *bap, int nb_coefs);
+    int (*compute_mantissa_size)(uint16_t mant_cnt[6][16]);
 
     void (*extract_exponents)(uint8_t *exp, int32_t *coef, int nb_coefs);
 } AC3DSPContext;
diff --git a/libavcodec/ac3enc.c b/libavcodec/ac3enc.c
index bb3490a..fbaa1c8 100644
--- a/libavcodec/ac3enc.c
+++ b/libavcodec/ac3enc.c
@@ -945,22 +945,6 @@ static void count_frame_bits(AC3EncodeContext *s)
 
 
 /**
- * Finalize the mantissa bit count by adding in the grouped mantissas.
- */
-static int compute_mantissa_size_final(int mant_cnt[5])
-{
-    // bap=1 : 3 mantissas in 5 bits
-    int bits = (mant_cnt[1] / 3) * 5;
-    // bap=2 : 3 mantissas in 7 bits
-    // bap=4 : 2 mantissas in 7 bits
-    bits += ((mant_cnt[2] / 3) + (mant_cnt[4] >> 1)) * 7;
-    // bap=3 : each mantissa is 3 bits
-    bits += mant_cnt[3] * 3;
-    return bits;
-}
-
-
-/**
  * Calculate masking curve based on the final exponents.
  * Also calculate the power spectral densities to use in future calculations.
  */
@@ -1008,30 +992,54 @@ static void reset_block_bap(AC3EncodeContext *s)
 
 
 /**
- * Count the number of mantissa bits in the frame based on the bap values.
+ * Initialize mantissa counts.
+ * These are set so that they are padded to the next whole group size when bits
+ * are counted in compute_mantissa_size.
  */
-static int count_mantissa_bits(AC3EncodeContext *s)
+static void count_mantissa_bits_init(uint16_t mant_cnt[AC3_MAX_BLOCKS][16])
 {
-    int blk, ch;
-    int mantissa_bits;
-    int mant_cnt[5];
+    int blk;
 
-    mantissa_bits = 0;
     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
-        // initialize grouped mantissa counts. these are set so that they are
-        // padded to the next whole group size when bits are counted in
-        // compute_mantissa_size_final
-        mant_cnt[0] = mant_cnt[3] = 0;
-        mant_cnt[1] = mant_cnt[2] = 2;
-        mant_cnt[4] = 1;
-        for (ch = 0; ch < s->channels; ch++) {
-            mantissa_bits += s->ac3dsp.compute_mantissa_size(mant_cnt,
-                                                             s->blocks[blk].exp_ref_block[ch]->bap[ch],
-                                                             s->nb_coefs[ch]);
-        }
-        mantissa_bits += compute_mantissa_size_final(mant_cnt);
+        memset(mant_cnt[blk], 0, sizeof(mant_cnt[blk]));
+        mant_cnt[blk][1] = mant_cnt[blk][2] = 2;
+        mant_cnt[blk][4] = 1;
+    }
+}
+
+
+/**
+ * Update mantissa bit counts for all blocks in 1 channel in a given bandwidth
+ * range.
+ */
+static void count_mantissa_bits_update_ch(AC3EncodeContext *s, int ch,
+                                          uint16_t mant_cnt[AC3_MAX_BLOCKS][16],
+                                          int start, int end)
+{
+    int blk, i;
+
+    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
+        uint8_t *bap = s->blocks[blk].exp_ref_block[ch]->bap[ch];
+        for (i = start; i < end; i++)
+            mant_cnt[blk][bap[i]]++;
     }
-    return mantissa_bits;
+}
+
+
+/**
+ * Count the number of mantissa bits in the frame based on the bap values.
+ */
+static int count_mantissa_bits(AC3EncodeContext *s)
+{
+    int ch;
+    LOCAL_ALIGNED_16(uint16_t, mant_cnt,[AC3_MAX_BLOCKS][16]);
+
+    count_mantissa_bits_init(mant_cnt);
+
+    for (ch = 0; ch < s->channels; ch++)
+        count_mantissa_bits_update_ch(s, ch, mant_cnt, 0, s->nb_coefs[ch]);
+
+    return s->ac3dsp.compute_mantissa_size(mant_cnt);
 }
 
 
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index a5abfdd..a5a5dfa 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -1,5 +1,4 @@
 OBJS-$(CONFIG_AC3DSP)                  += arm/ac3dsp_init_arm.o         \
-                                          arm/ac3dsp_arm.o
 
 OBJS-$(CONFIG_DCA_DECODER)             += arm/dcadsp_init_arm.o         \
 
diff --git a/libavcodec/arm/ac3dsp_arm.S b/libavcodec/arm/ac3dsp_arm.S
deleted file mode 100644
index d7d498e..0000000
--- a/libavcodec/arm/ac3dsp_arm.S
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2011 Mans Rullgard <[email protected]>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "asm.S"
-
-function ff_ac3_compute_mantissa_size_arm, export=1
-        push            {r4-r8,lr}
-        ldm             r0,  {r4-r8}
-        mov             r3,  r0
-        mov             r0,  #0
-1:
-        ldrb            lr,  [r1], #1
-        subs            r2,  r2,  #1
-        blt             2f
-        cmp             lr,  #4
-        bgt             3f
-        subs            lr,  lr,  #1
-        addlt           r4,  r4,  #1
-        addeq           r5,  r5,  #1
-        ble             1b
-        subs            lr,  lr,  #2
-        addlt           r6,  r6,  #1
-        addeq           r7,  r7,  #1
-        addgt           r8,  r8,  #1
-        b               1b
-3:
-        cmp             lr,  #14
-        sublt           lr,  lr,  #1
-        addgt           r0,  r0,  #16
-        addle           r0,  r0,  lr
-        b               1b
-2:
-        stm             r3,  {r4-r8}
-        pop             {r4-r8,pc}
-endfunc
diff --git a/libavcodec/arm/ac3dsp_init_arm.c b/libavcodec/arm/ac3dsp_init_arm.c
index fd78e1e..4414dc8 100644
--- a/libavcodec/arm/ac3dsp_init_arm.c
+++ b/libavcodec/arm/ac3dsp_init_arm.c
@@ -39,8 +39,6 @@ int ff_ac3_compute_mantissa_size_arm(int cnt[5], uint8_t *bap, int nb_coefs);
 
 av_cold void ff_ac3dsp_init_arm(AC3DSPContext *c, int bit_exact)
 {
-    c->compute_mantissa_size     = ff_ac3_compute_mantissa_size_arm;
-
     if (HAVE_ARMV6) {
         c->bit_alloc_calc_bap    = ff_ac3_bit_alloc_calc_bap_armv6;
     }
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to