From: Janne Grunau <[email protected]>

Get rid of the malloc. The product of N and B is at most
MAX(celt_freq_range) * 8.

Approximately 5% faster celt decoding.
---
feel free to squash it

 libavcodec/opus_celt.c | 45 ++++++++++++++++++++++++++++++---------------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 4d15d0b..8f144de 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -97,6 +97,7 @@ struct CeltContext {
     int tf_change    [CELT_MAX_BANDS];
 
     DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE];
+    DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(celt_freq_range) * 
1<<CELT_MAX_LOG_BLOCKS
 };
 
 static const uint16_t celt_model_tapset[] = { 4, 2, 3, 4 };
@@ -1119,34 +1120,46 @@ static inline void celt_stereo_merge(float *X, float 
*Y, float mid, int N)
     }
 }
 
-static void celt_interleave_hadamard(float *X, int N0, int stride, int 
hadamard, int interleave)
+static void celt_interleave_hadamard(float *tmp, float *X, int N0,
+                                     int stride, int hadamard)
 {
-    // TODO: Study N and B to find out the maximum size to allocate, and do so 
statically
-    // TODO: Also, combine deinterleave and interleave together into the same 
function
     int i, j;
     int N = N0*stride;
-    float *tmp = av_malloc(N * sizeof(float));
 
     if (hadamard) {
         const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
         for (i = 0; i < stride; i++)
             for (j = 0; j < N0; j++)
-                if (interleave)
-                    tmp[j*stride+i] = X[ordery[i]*N0+j];
-                else
-                    tmp[ordery[i]*N0+j] = X[j*stride+i];
+                tmp[j*stride+i] = X[ordery[i]*N0+j];
     } else {
         for (i = 0; i < stride; i++)
             for (j = 0; j < N0; j++)
-                if (interleave)
-                    tmp[j*stride+i] = X[i*N0+j];
-                else
-                    tmp[i*N0+j] = X[j*stride+i];
+                tmp[j*stride+i] = X[i*N0+j];
+    }
+
+    for (i = 0; i < N; i++)
+        X[i] = tmp[i];
+}
+
+static void celt_deinterleave_hadamard(float *tmp, float *X, int N0,
+                                       int stride, int hadamard)
+{
+    int i, j;
+    int N = N0*stride;
+
+    if (hadamard) {
+        const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
+        for (i = 0; i < stride; i++)
+            for (j = 0; j < N0; j++)
+                tmp[ordery[i]*N0+j] = X[j*stride+i];
+    } else {
+        for (i = 0; i < stride; i++)
+            for (j = 0; j < N0; j++)
+                tmp[i*N0+j] = X[j*stride+i];
     }
 
     for (i = 0; i < N; i++)
         X[i] = tmp[i];
-    av_free(tmp);
 }
 
 static void celt_haar1(float *X, int N0, int stride)
@@ -1366,7 +1379,8 @@ static unsigned int celt_decode_band(CeltContext *s, 
OpusRangeCoder *rc,
 
         /* Reorganize the samples in time order instead of frequency order */
         if (B0 > 1 && lowband)
-            celt_interleave_hadamard(lowband, N_B>>recombine, B0<<recombine, 
longblocks, 0);
+            celt_deinterleave_hadamard(s->scratch, lowband, N_B>>recombine,
+                                       B0<<recombine, longblocks);
     }
 
     /* If we need 1.5 more bit than we can produce, split the band in two. */
@@ -1604,7 +1618,8 @@ static unsigned int celt_decode_band(CeltContext *s, 
OpusRangeCoder *rc,
 
         /* Undo the sample reorganization going from time order to frequency 
order */
         if (B0 > 1)
-            celt_interleave_hadamard(X, N_B>>recombine, B0<<recombine, 
longblocks, 1);
+            celt_interleave_hadamard(s->scratch, X, N_B>>recombine,
+                                     B0<<recombine, longblocks);
 
         /* Undo time-freq changes that we did earlier */
         N_B = N_B0;
-- 
1.9.2

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to