From: Janne Grunau <[email protected]>
Get rid of the malloc. The product of N and B is at most
MAX(celt_freq_range) * 8.
Approximately 5% faster celt decoding.
---
feel free to squash it
libavcodec/opus_celt.c | 45 ++++++++++++++++++++++++++++++---------------
1 file changed, 30 insertions(+), 15 deletions(-)
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c
index 4d15d0b..8f144de 100644
--- a/libavcodec/opus_celt.c
+++ b/libavcodec/opus_celt.c
@@ -97,6 +97,7 @@ struct CeltContext {
int tf_change [CELT_MAX_BANDS];
DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE];
+ DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(celt_freq_range) *
1<<CELT_MAX_LOG_BLOCKS
};
static const uint16_t celt_model_tapset[] = { 4, 2, 3, 4 };
@@ -1119,34 +1120,46 @@ static inline void celt_stereo_merge(float *X, float
*Y, float mid, int N)
}
}
-static void celt_interleave_hadamard(float *X, int N0, int stride, int
hadamard, int interleave)
+static void celt_interleave_hadamard(float *tmp, float *X, int N0,
+ int stride, int hadamard)
{
- // TODO: Study N and B to find out the maximum size to allocate, and do so
statically
- // TODO: Also, combine deinterleave and interleave together into the same
function
int i, j;
int N = N0*stride;
- float *tmp = av_malloc(N * sizeof(float));
if (hadamard) {
const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
for (i = 0; i < stride; i++)
for (j = 0; j < N0; j++)
- if (interleave)
- tmp[j*stride+i] = X[ordery[i]*N0+j];
- else
- tmp[ordery[i]*N0+j] = X[j*stride+i];
+ tmp[j*stride+i] = X[ordery[i]*N0+j];
} else {
for (i = 0; i < stride; i++)
for (j = 0; j < N0; j++)
- if (interleave)
- tmp[j*stride+i] = X[i*N0+j];
- else
- tmp[i*N0+j] = X[j*stride+i];
+ tmp[j*stride+i] = X[i*N0+j];
+ }
+
+ for (i = 0; i < N; i++)
+ X[i] = tmp[i];
+}
+
+static void celt_deinterleave_hadamard(float *tmp, float *X, int N0,
+ int stride, int hadamard)
+{
+ int i, j;
+ int N = N0*stride;
+
+ if (hadamard) {
+ const uint8_t *ordery = celt_hadamard_ordery + stride - 2;
+ for (i = 0; i < stride; i++)
+ for (j = 0; j < N0; j++)
+ tmp[ordery[i]*N0+j] = X[j*stride+i];
+ } else {
+ for (i = 0; i < stride; i++)
+ for (j = 0; j < N0; j++)
+ tmp[i*N0+j] = X[j*stride+i];
}
for (i = 0; i < N; i++)
X[i] = tmp[i];
- av_free(tmp);
}
static void celt_haar1(float *X, int N0, int stride)
@@ -1366,7 +1379,8 @@ static unsigned int celt_decode_band(CeltContext *s,
OpusRangeCoder *rc,
/* Reorganize the samples in time order instead of frequency order */
if (B0 > 1 && lowband)
- celt_interleave_hadamard(lowband, N_B>>recombine, B0<<recombine,
longblocks, 0);
+ celt_deinterleave_hadamard(s->scratch, lowband, N_B>>recombine,
+ B0<<recombine, longblocks);
}
/* If we need 1.5 more bit than we can produce, split the band in two. */
@@ -1604,7 +1618,8 @@ static unsigned int celt_decode_band(CeltContext *s,
OpusRangeCoder *rc,
/* Undo the sample reorganization going from time order to frequency
order */
if (B0 > 1)
- celt_interleave_hadamard(X, N_B>>recombine, B0<<recombine,
longblocks, 1);
+ celt_interleave_hadamard(s->scratch, X, N_B>>recombine,
+ B0<<recombine, longblocks);
/* Undo time-freq changes that we did earlier */
N_B = N_B0;
--
1.9.2
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel