[FFmpeg-cvslog] aacdec: restore arm32 dequantization optimizations

2024-05-13 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat May 11 07:27:17 2024 
+0200| [baf8651d563bd42b27738e5215c0d822638c0a9b] | committer: Lynne

aacdec: restore arm32 dequantization optimizations

Unintentionally removed as part of 03cf10164578aed33f4d0cb5b69d63669c01a538.
Untested, but its assumed that unlike most of the old ARM code,
this one was still working.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=baf8651d563bd42b27738e5215c0d822638c0a9b
---

 libavcodec/aac/aacdec_float.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index 885d824fa7..03ec264c50 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -79,6 +79,11 @@ static const float cce_scale[] = {
 #include "aacdec_tab.h"
 #include "libavutil/intfloat.h"
 
+#include "config.h"
+#if ARCH_ARM
+#include "libavcodec/arm/aac.h"
+#endif
+
 #ifndef VMUL2
 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
const float *scale)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] opustab: macro constant as a double

2024-04-29 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Apr 29 01:44:46 2024 
+0200| [f492095bd3e22ecf5565a08437a0816910bac949] | committer: Lynne

opustab: macro constant as a double

May increase intermediate precision on some compilers.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f492095bd3e22ecf5565a08437a0816910bac949
---

 libavcodec/opustab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c
index 47624fe651..917375253e 100644
--- a/libavcodec/opustab.c
+++ b/libavcodec/opustab.c
@@ -1164,7 +1164,7 @@ const uint32_t * const ff_celt_pvq_u_row[15] = {
  * libopus uses a slighly rounded constant, set to 0.85 exactly,
  * to simplify its fixed-point version, but it's not significant to impact
  * compliance. */
-#define CELT_EMPH_COEFF 0.8500061035f
+#define CELT_EMPH_COEFF 0.8500061035
 
 DECLARE_ALIGNED(16, const float, ff_opus_deemph_weights)[] = {
 CELT_EMPH_COEFF,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: avoid generating unused code when either implementation is disabled

2024-04-27 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Apr 24 13:01:14 2024 
+0200| [3390693bfb907765f833766f370e0ba8c7894f44] | committer: Lynne

aacdec: avoid generating unused code when either implementation is disabled

Minor optimization to remove extra branches.
We need to include the header for xHE anyway, which is float-only.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3390693bfb907765f833766f370e0ba8c7894f44
---

 libavcodec/aac/aacdec.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 4a29c1b092..72f2d7e7ba 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -33,6 +33,8 @@
  * for which we need this to be defined for them to work as expected. */
 #define USE_FIXED 1
 
+#include "config_components.h"
+
 #include 
 #include 
 
@@ -1312,9 +1314,9 @@ static void decode_ltp(AACDecContext *ac, 
LongTermPrediction *ltp,
 int sfb;
 
 ltp->lag  = get_bits(gb, 11);
-if (ac->is_fixed)
+if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
 ltp->coef_fixed = Q30(ff_ltp_coef[get_bits(gb, 3)]);
-else
+else if (CONFIG_AAC_DECODER)
 ltp->coef = ff_ltp_coef[get_bits(gb, 3)];
 
 for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
@@ -1623,9 +1625,9 @@ static int decode_tns(AACDecContext *ac, 
TemporalNoiseShaping *tns,
 tmp2_idx = 2 * coef_compress + coef_res;
 
 for (i = 0; i < tns->order[w][filt]; i++) {
-if (ac->is_fixed)
+if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
 tns->coef_fixed[w][filt][i] = 
Q31(ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]);
-else
+else if (CONFIG_AAC_DECODER)
 tns->coef[w][filt][i] = 
ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)];
 }
 }
@@ -1974,9 +1976,9 @@ static int decode_extension_payload(AACDecContext *ac, 
GetBitContext *gb, int cn
 ac->avctx->profile = AV_PROFILE_AAC_HE;
 }
 
-if (ac->is_fixed)
+if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
 res = ff_aac_sbr_decode_extension_fixed(ac, che, gb, crc_flag, 
cnt, elem_type);
-else
+else if (CONFIG_AAC_DECODER)
 res = ff_aac_sbr_decode_extension(ac, che, gb, crc_flag, cnt, 
elem_type);
 
 
@@ -2087,11 +2089,11 @@ static void spectral_to_sample(AACDecContext *ac, int 
samples)
 ac->dsp.update_ltp(ac, >ch[1]);
 }
 if (ac->oc[1].m4ac.sbr > 0) {
-if (ac->is_fixed)
+if (CONFIG_AAC_FIXED_DECODER && ac->is_fixed)
 ff_aac_sbr_apply_fixed(ac, che, type,
(void *)che->ch[0].output,
(void *)che->ch[1].output);
-else
+else if (CONFIG_AAC_DECODER)
 ff_aac_sbr_apply(ac, che, type,
  (void *)che->ch[0].output,
  (void *)che->ch[1].output);
@@ -2550,6 +2552,7 @@ static const AVClass decoder_class = {
 .version= LIBAVUTIL_VERSION_INT,
 };
 
+#if CONFIG_AAC_DECODER
 const FFCodec ff_aac_decoder = {
 .p.name  = "aac",
 CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
@@ -2569,7 +2572,9 @@ const FFCodec ff_aac_decoder = {
 .flush = flush,
 .p.profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 };
+#endif
 
+#if CONFIG_AAC_FIXED_DECODER
 const FFCodec ff_aac_fixed_decoder = {
 .p.name  = "aac_fixed",
 CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
@@ -2589,3 +2594,4 @@ const FFCodec ff_aac_fixed_decoder = {
 .p.profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 .flush = flush,
 };
+#endif

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] opusdsp: add ability to modify deemphasis constant

2024-04-27 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Jan 29 04:31:43 2024 
+0100| [134dba9544f4251ebf5fbbae72f2cddc390ac195] | committer: Lynne

opusdsp: add ability to modify deemphasis constant

xHE-AAC relies on the same postfilter mechanism
that Opus uses to improve clarity (albeit with a steeper
deemphasis filter).

The code to apply it is identical, it's still just a
simple IIR low-pass filter. This commit makes it possible
to use alternative constants.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=134dba9544f4251ebf5fbbae72f2cddc390ac195
---

 libavcodec/aarch64/opusdsp_init.c |  2 +-
 libavcodec/aarch64/opusdsp_neon.S | 28 +---
 libavcodec/opusdec_celt.c |  6 --
 libavcodec/opusdsp.c  |  6 --
 libavcodec/opusdsp.h  |  4 +---
 libavcodec/opusenc.c  |  5 +++--
 libavcodec/opustab.c  | 28 
 libavcodec/opustab.h  |  2 ++
 libavcodec/x86/opusdsp.asm|  9 +++--
 libavcodec/x86/opusdsp_init.c |  2 +-
 tests/checkasm/opusdsp.c  |  9 +
 11 files changed, 57 insertions(+), 44 deletions(-)

diff --git a/libavcodec/aarch64/opusdsp_init.c 
b/libavcodec/aarch64/opusdsp_init.c
index bb6d71b66b..a727006593 100644
--- a/libavcodec/aarch64/opusdsp_init.c
+++ b/libavcodec/aarch64/opusdsp_init.c
@@ -23,7 +23,7 @@
 #include "libavcodec/opusdsp.h"
 
 void ff_opus_postfilter_neon(float *data, int period, float *gains, int len);
-float ff_opus_deemphasis_neon(float *out, float *in, float coeff, int len);
+float ff_opus_deemphasis_neon(float *out, float *in, float coeff, const float 
*weights, int len);
 
 av_cold void ff_opus_dsp_init_aarch64(OpusDSP *ctx)
 {
diff --git a/libavcodec/aarch64/opusdsp_neon.S 
b/libavcodec/aarch64/opusdsp_neon.S
index e933151ab4..253825aa61 100644
--- a/libavcodec/aarch64/opusdsp_neon.S
+++ b/libavcodec/aarch64/opusdsp_neon.S
@@ -18,29 +18,11 @@
 
 #include "libavutil/aarch64/asm.S"
 
-   // 0.85..^10.85..^20.85..^30.85..^4
-const tab_st, align=4
-.word 0x3f599a00, 0x3f38f671, 0x3f1d382a, 0x3f05a32f
-endconst
-const tab_x0, align=4
-.word 0x0,0x3f599a00, 0x3f38f671, 0x3f1d382a
-endconst
-const tab_x1, align=4
-.word 0x0,0x0,0x3f599a00, 0x3f38f671
-endconst
-const tab_x2, align=4
-.word 0x0,0x0,0x0,0x3f599a00
-endconst
-
 function ff_opus_deemphasis_neon, export=1
-movrel  x4, tab_st
-ld1 {v4.4s}, [x4]
-movrel  x4, tab_x0
-ld1 {v5.4s}, [x4]
-movrel  x4, tab_x1
-ld1 {v6.4s}, [x4]
-movrel  x4, tab_x2
-ld1 {v7.4s}, [x4]
+ld1 {v4.4s}, [x2], #16
+ld1 {v5.4s}, [x2], #16
+ld1 {v6.4s}, [x2], #16
+ld1 {v7.4s}, [x2]
 
 fmulv0.4s, v4.4s, v0.s[0]
 
@@ -63,7 +45,7 @@ function ff_opus_deemphasis_neon, export=1
 st1 {v1.4s, v2.4s}, [x0], #32
 fmulv0.4s, v4.4s, v2.s[3]
 
-subsw2, w2, #8
+subsw3, w3, #8
 b.gt1b
 
 mov s0, v2.s[3]
diff --git a/libavcodec/opusdec_celt.c b/libavcodec/opusdec_celt.c
index fd8e9929e9..b19342337d 100644
--- a/libavcodec/opusdec_celt.c
+++ b/libavcodec/opusdec_celt.c
@@ -460,7 +460,9 @@ int ff_celt_decode_frame(CeltFrame *f, OpusRangeCoder *rc,
 /* deemphasis */
 block->emph_coeff = f->opusdsp.deemphasis(output[i],
   >buf[1024 - 
frame_size],
-  block->emph_coeff, 
frame_size);
+  block->emph_coeff,
+  ff_opus_deemph_weights,
+  frame_size);
 }
 
 if (channels == 1)
@@ -516,7 +518,7 @@ void ff_celt_flush(CeltFrame *f)
  * a lesser discontinuity when seeking.
  * The deemphasis functions differ from libopus in that they require
  * an initial state divided by the coefficient. */
-block->emph_coeff = 0.0f / CELT_EMPH_COEFF;
+block->emph_coeff = 0.0f / ff_opus_deemph_weights[0];
 }
 f->seed = 0;
 
diff --git a/libavcodec/opusdsp.c b/libavcodec/opusdsp.c
index 0764d712e4..e61cc36098 100644
--- a/libavcodec/opusdsp.c
+++ b/libavcodec/opusdsp.c
@@ -18,6 +18,7 @@
 
 #include "config.h"
 #include "libavutil/attributes.h"
+#include "libavutil/mem_internal.h"
 #include "opusdsp.h"
 
 static void postfilter_c(float *data, int period, float *gains, int len)
@@ -43,10 +44,11 @@ static void postfilter_c(float *data, int period, float 
*gains, int len)
 }
 }
 
-static float deemphasis_c(

[FFmpeg-cvslog] aacsbr: constify the only SBR table

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Mar 22 06:43:16 2024 
+0100| [176c922e4edac82df2c8ea508f81d2aba29c6468] | committer: Lynne

aacsbr: constify the only SBR table

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=176c922e4edac82df2c8ea508f81d2aba29c6468
---

 libavcodec/aacsbr_template.c |  10 ---
 libavcodec/aacsbrdata.h  | 164 ++-
 2 files changed, 163 insertions(+), 11 deletions(-)

diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
index 066edbc199..a6e3bc54bd 100644
--- a/libavcodec/aacsbr_template.c
+++ b/libavcodec/aacsbr_template.c
@@ -49,18 +49,8 @@ static inline SpectralBandReplication 
*get_sbr(ChannelElement *ch)
 return &((ExtChannelElement*)ch)->sbr;
 }
 
-static av_cold void aacsbr_tableinit(void)
-{
-int n;
-
-for (n = 0; n < 320; n++)
-sbr_qmf_window_ds[n] = sbr_qmf_window_us[2*n];
-}
-
 av_cold void AAC_RENAME(ff_aac_sbr_init)(void)
 {
-aacsbr_tableinit();
-
 AAC_RENAME(ff_ps_init)();
 }
 
diff --git a/libavcodec/aacsbrdata.h b/libavcodec/aacsbrdata.h
index b0585309e0..9c25098240 100644
--- a/libavcodec/aacsbrdata.h
+++ b/libavcodec/aacsbrdata.h
@@ -42,7 +42,169 @@ static const int8_t sbr_offset[6][16] = {
 };
 
 ///< window coefficients for analysis/synthesis QMF banks
-static DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320];
+static const DECLARE_ALIGNED(32, INTFLOAT, sbr_qmf_window_ds)[320] = {
+ Q31( 0.00f), Q31(-0.0005617692f),
+ Q31(-0.0004875227f), Q31(-0.0005040714f),
+ Q31(-0.0005466565f), Q31(-0.0005870930f),
+ Q31(-0.0006312493f), Q31(-0.0006777690f),
+ Q31(-0.0007157736f), Q31(-0.0007440941f),
+ Q31(-0.0007681371f), Q31(-0.0007834332f),
+ Q31(-0.0007803664f), Q31(-0.0007757977f),
+ Q31(-0.0007530001f), Q31(-0.0007215391f),
+ Q31(-0.0006650415f), Q31(-0.0005946118f),
+ Q31(-0.0005145572f), Q31(-0.0004095121f),
+ Q31(-0.0002896981f), Q31(-0.0001446380f),
+ Q31( 0.134949f), Q31( 0.0002043017f),
+ Q31( 0.0004026540f), Q31( 0.0006239376f),
+ Q31( 0.0008608443f), Q31( 0.0011250155f),
+ Q31( 0.0013902494f), Q31( 0.0016868083f),
+ Q31( 0.0019841140f), Q31( 0.0023017254f),
+ Q31( 0.0026201758f), Q31( 0.0029469447f),
+ Q31( 0.0032739613f), Q31( 0.0036008268f),
+ Q31( 0.0039207432f), Q31( 0.0042264269f),
+ Q31( 0.0045209852f), Q31( 0.0047932560f),
+ Q31( 0.0050393022f), Q31( 0.0052461166f),
+ Q31( 0.0054196775f), Q31( 0.0055475714f),
+ Q31( 0.0056220643f), Q31( 0.0056389199f),
+ Q31( 0.0055917128f), Q31( 0.0054753783f),
+ Q31( 0.0052715758f), Q31( 0.0049839687f),
+ Q31( 0.0046039530f), Q31( 0.0041251642f),
+ Q31( 0.0035401246f), Q31( 0.0028446757f),
+ Q31( 0.0020274176f), Q31( 0.0010902329f),
+ Q31( 0.276045f), Q31(-0.0011568135f),
+ Q31(-0.0024826723f), Q31(-0.0039401124f),
+ Q31(-0.0055337211f), Q31(-0.0072615816f),
+ Q31(-0.0091325329f), Q31(-0.0111315548f),
+ Q31( 0.0132718220f), Q31( 0.0155405553f),
+ Q31( 0.0179433381f), Q31( 0.0204531793f),
+ Q31( 0.0230680169f), Q31( 0.0257875847f),
+ Q31( 0.0286072173f), Q31( 0.0315017608f),
+ Q31( 0.0344620948f), Q31( 0.0374812850f),
+ Q31( 0.0405349170f), Q31( 0.0436097542f),
+ Q31( 0.0466843027f), Q31( 0.0497385755f),
+ Q31( 0.0527630746f), Q31( 0.0557173648f),
+ Q31( 0.0585915683f), Q31( 0.0613455171f),
+ Q31( 0.0639715898f), Q31( 0.0664367512f),
+ Q31( 0.0687043828f), Q31( 0.0707628710f),
+ Q31( 0.0725682583f), Q31( 0.0741003642f),
+ Q31( 0.0753137336f), Q31( 0.0761992479f),
+ Q31( 0.0767093490f), Q31( 0.0768230011f),
+ Q31( 0.0765050718f), Q31( 0.0757305756f),
+ Q31( 0.0744664394f), Q31( 0.0726774642f),
+ Q31( 0.0703533073f), Q31( 0.0674525021f),
+ Q31( 0.0639444805f), Q31( 0.0598166570f),
+ Q31( 0.0550460034f), Q31( 0.0495978676f),
+ Q31( 0.0434768782f), Q31( 0.0366418116f),
+ Q31( 0.0290824006f), Q31( 0.0207997072f),
+ Q31( 0.0117623832f), Q31( 0.0019765601f),
+ Q31(-0.0085711749f), Q31(-0.0198834129f),
+ Q31(-0.0319531274f), Q31(-0.0447806821f),
+ Q31(-0.0583705326f), Q31(-0.0726943300f),
+ Q31(-0.0877547536f), Q31(-0.1035329531f),
+ Q31(-0.1200077984f), Q31(-0.1371551761f),
+ Q31(-0.1549607071f), Q31(-0.1733808172f),
+ Q31(-0.1923966745f), Q31(-0.2119735853f),
+ Q31(-0.2320690870f), Q31(-0.2526480309f),
+ Q31(-0.2736634040f), Q31(-0.2950716717f),
+ Q31(-0.3168278913f), Q31(-0.3388722693f),
+ Q31( 0.3611589903f), Q31( 0.3836350013f),
+ Q31( 0.4062317676f), Q31( 0.4289119920f),
+ Q31( 0.4515996535f), Q31( 0.4742453214f),
+ Q31( 0.4967708254f), Q31( 0.5191234970f),
+ Q31( 0.5412553448f), Q31( 0.5630789140f),
+ Q31( 0.5845403235f), Q31( 0.6055783538f),
+ Q31( 0.6261242695f), Q31( 0.6461269695f),
+ Q31( 0.6655139880f), Q31( 0.6842353293f),
+ Q31( 0.7022388719f), Q31( 0.7194462634f),
+ Q31( 0.7

[FFmpeg-cvslog] aacdec: initialize float/fixed SBR tables only when either is needed

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Mar 22 07:13:08 2024 
+0100| [e3650886c7e1df3301ec2011a02147c3e32e33ad] | committer: Lynne

aacdec: initialize float/fixed SBR tables only when either is needed

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e3650886c7e1df3301ec2011a02147c3e32e33ad
---

 libavcodec/aac/aacdec.c   | 29 +++--
 libavcodec/aac/aacdec_fixed.c |  3 +++
 libavcodec/aac/aacdec_float.c |  3 +++
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 01a10468fa..4a29c1b092 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -29,6 +29,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+/* We use several quantization functions here (Q31, Q30),
+ * for which we need this to be defined for them to work as expected. */
+#define USE_FIXED 1
+
 #include 
 #include 
 
@@ -184,8 +188,8 @@ static int frame_configure_elements(AVCodecContext *avctx)
 for (id = 0; id < MAX_ELEM_ID; id++) {
 ChannelElement *che = ac->che[type][id];
 if (che) {
-che->ch[0].AAC_RENAME(output) = che->ch[0].AAC_RENAME(ret_buf);
-che->ch[1].AAC_RENAME(output) = che->ch[1].AAC_RENAME(ret_buf);
+che->ch[0].output = che->ch[0].ret_buf;
+che->ch[1].output = che->ch[1].ret_buf;
 }
 }
 }
@@ -202,7 +206,7 @@ static int frame_configure_elements(AVCodecContext *avctx)
 /* map output channel pointers to AVFrame data */
 for (ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
 if (ac->output_element[ch])
-ac->output_element[ch]->AAC_RENAME(output) = (INTFLOAT 
*)ac->frame->extended_data[ch];
+ac->output_element[ch]->output = (void 
*)ac->frame->extended_data[ch];
 }
 
 return 0;
@@ -1107,9 +,6 @@ static int sample_rate_idx (int rate)
 
 static av_cold void aac_static_table_init(void)
 {
-ff_aac_sbr_init();
-ff_aac_sbr_init_fixed();
-
 ff_aacdec_common_init_once();
 }
 static AVOnce aac_table_init = AV_ONCE_INIT;
@@ -1118,8 +1119,8 @@ static av_cold int decode_close(AVCodecContext *avctx)
 {
 AACDecContext *ac = avctx->priv_data;
 int is_fixed = ac->is_fixed;
-void (*sbr_close)(ChannelElement *che) = is_fixed ? 
RENAME_FIXED(ff_aac_sbr_ctx_close)
-  : ff_aac_sbr_ctx_close;
+void (*sbr_close)(ChannelElement *che) = is_fixed ? 
ff_aac_sbr_ctx_close_fixed :
+ff_aac_sbr_ctx_close;
 
 for (int type = 0; type < FF_ARRAY_ELEMS(ac->che); type++) {
 for (int i = 0; i < MAX_ELEM_ID; i++) {
@@ -2069,10 +2070,10 @@ static void spectral_to_sample(AACDecContext *ac, int 
samples)
 }
 }
 if (che->ch[0].tns.present)
-ac->dsp.apply_tns(che->ch[0].AAC_RENAME(coeffs),
+ac->dsp.apply_tns(che->ch[0].coeffs,
   >ch[0].tns, >ch[0].ics, 1);
 if (che->ch[1].tns.present)
-ac->dsp.apply_tns(che->ch[1].AAC_RENAME(coeffs),
+ac->dsp.apply_tns(che->ch[1].coeffs,
   >ch[1].tns, >ch[1].ics, 1);
 if (type <= TYPE_CPE)
 apply_channel_coupling(ac, che, type, i, 
BETWEEN_TNS_AND_IMDCT, ac->dsp.apply_dependent_coupling);
@@ -2088,12 +2089,12 @@ static void spectral_to_sample(AACDecContext *ac, int 
samples)
 if (ac->oc[1].m4ac.sbr > 0) {
 if (ac->is_fixed)
 ff_aac_sbr_apply_fixed(ac, che, type,
-   
che->ch[0].AAC_RENAME(output),
-   
che->ch[1].AAC_RENAME(output));
+   (void *)che->ch[0].output,
+   (void *)che->ch[1].output);
 else
 ff_aac_sbr_apply(ac, che, type,
- che->ch[0].AAC_RENAME(output),
- che->ch[1].AAC_RENAME(output));
+ (void *)che->ch[0].output,
+ (void *)che->ch[1].output);
 }
 }
 if (type <= TYPE_CCE)
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index a65ba19137..92204180a1 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -41,6 +41,7 @@
 #include "libavcodec/sinewi

[FFmpeg-cvslog] aacdec: move aacdec.h into libavcodec/aac

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Tue Mar 19 20:19:06 2024 
+0100| [551ce16b59b109093516e2f4000ae809fcd0b9f3] | committer: Lynne

aacdec: move aacdec.h into libavcodec/aac

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=551ce16b59b109093516e2f4000ae809fcd0b9f3
---

 libavcodec/aac/aacdec.c|  2 +-
 libavcodec/{ => aac}/aacdec.h  | 10 +-
 libavcodec/aac/aacdec_dsp_template.c   |  2 +-
 libavcodec/aac/aacdec_fixed.c  |  2 +-
 libavcodec/aac/aacdec_fixed_coupling.h |  2 +-
 libavcodec/aac/aacdec_float.c  |  2 +-
 libavcodec/aac/aacdec_float_coupling.h |  2 +-
 libavcodec/aacdec.c|  2 +-
 libavcodec/aacsbr.h|  2 +-
 libavcodec/aacsbr_template.c   |  2 +-
 10 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 3af0e808fd..dfbc309583 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -34,7 +34,7 @@
 
 #include "libavcodec/aac.h"
 #include "libavcodec/aacsbr.h"
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 #include "libavcodec/avcodec.h"
 #include "libavutil/attributes.h"
 #include "libavutil/error.h"
diff --git a/libavcodec/aacdec.h b/libavcodec/aac/aacdec.h
similarity index 98%
rename from libavcodec/aacdec.h
rename to libavcodec/aac/aacdec.h
index 3d15cef453..c8107f6bce 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aac/aacdec.h
@@ -27,8 +27,8 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-#ifndef AVCODEC_AACDEC_H
-#define AVCODEC_AACDEC_H
+#ifndef AVCODEC_AAC_AACDEC_H
+#define AVCODEC_AAC_AACDEC_H
 
 #include 
 
@@ -38,8 +38,8 @@
 #include "libavutil/mem_internal.h"
 #include "libavutil/tx.h"
 
-#include "aac.h"
-#include "mpeg4audio.h"
+#include "libavcodec/aac.h"
+#include "libavcodec/mpeg4audio.h"
 
 typedef struct AACDecContext AACDecContext;
 
@@ -349,4 +349,4 @@ void ff_aacdec_init_mips(AACDecContext *c);
 int ff_aac_decode_ics(AACDecContext *ac, SingleChannelElement *sce,
   GetBitContext *gb, int common_window, int scale_flag);
 
-#endif /* AVCODEC_AACDEC_H */
+#endif /* AVCODEC_AAC_AACDEC_H */
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index f260d32e4a..a42b40f674 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -29,7 +29,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 #include "libavcodec/lpc_functions.h"
 
 #include "libavcodec/aactab.h"
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index d706cfcc92..a65ba19137 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -36,7 +36,7 @@
 #include "libavcodec/aac_defines.h"
 
 #include "libavcodec/avcodec.h"
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin_fixed_tablegen.h"
 #include "libavcodec/kbdwin.h"
diff --git a/libavcodec/aac/aacdec_fixed_coupling.h 
b/libavcodec/aac/aacdec_fixed_coupling.h
index b6a54b35bd..add4cd69da 100644
--- a/libavcodec/aac/aacdec_fixed_coupling.h
+++ b/libavcodec/aac/aacdec_fixed_coupling.h
@@ -32,7 +32,7 @@
 #ifndef AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
 #define AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
 
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 
 /**
  * Apply dependent channel coupling (applied before IMDCT).
diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index 6801085098..f0e69579f5 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -36,7 +36,7 @@
 #include "libavcodec/aac_defines.h"
 
 #include "libavcodec/avcodec.h"
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin.h"
 #include "libavcodec/kbdwin.h"
diff --git a/libavcodec/aac/aacdec_float_coupling.h 
b/libavcodec/aac/aacdec_float_coupling.h
index 475d987a66..50ad76eda2 100644
--- a/libavcodec/aac/aacdec_float_coupling.h
+++ b/libavcodec/aac/aacdec_float_coupling.h
@@ -32,7 +32,7 @@
 #ifndef AVCODEC_AAC_AACDEC_FLOAT_COUPLING_H
 #define AVCODEC_AAC_AACDEC_FLOAT_COUPLING_H
 
-#include "libavcodec/aacdec.h"
+#include "aacdec.h"
 
 /**
  * Apply dependent channel coupling (applied before IMDCT).
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 6542f8e527..666e0c89db 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -42,7 +42,7 @@
 #include "sinewin.h"
 
 #include "aac.h"
-#include "aacdec.h&quo

[FFmpeg-cvslog] aacdec: move LATM decode functions into a separate file

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 23:34:46 2024 
+0100| [ce740618d194e6c8523466ba15be2d662da37105] | committer: Lynne

aacdec: move LATM decode functions into a separate file

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ce740618d194e6c8523466ba15be2d662da37105
---

 libavcodec/aac/aacdec_latm.h | 350 +++
 libavcodec/aacdec.c  | 315 +-
 2 files changed, 351 insertions(+), 314 deletions(-)

diff --git a/libavcodec/aac/aacdec_latm.h b/libavcodec/aac/aacdec_latm.h
new file mode 100644
index 00..0226aebba4
--- /dev/null
+++ b/libavcodec/aac/aacdec_latm.h
@@ -0,0 +1,350 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse 
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall 
+ * Copyright (c) 2010  Janne Grunau 
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *  MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_LATM_H
+#define AVCODEC_AAC_AACDEC_LATM_H
+
+#define LOAS_SYNC_WORD   0x2b7   ///< 11 bits LOAS sync word
+
+struct LATMContext {
+AACDecContext aac_ctx;  ///< containing AACContext
+int initialized;///< initialized after a valid extradata was seen
+
+// parser data
+int audio_mux_version_A; ///< LATM syntax version
+int frame_length_type;   ///< 0/1 variable/fixed frame length
+int frame_length;///< frame length for fixed frame length
+};
+
+static inline uint32_t latm_get_value(GetBitContext *b)
+{
+int length = get_bits(b, 2);
+
+return get_bits_long(b, (length+1)*8);
+}
+
+static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
+ GetBitContext *gb, int asclen)
+{
+AACDecContext *ac = >aac_ctx;
+AVCodecContext *avctx = ac->avctx;
+MPEG4AudioConfig m4ac = { 0 };
+GetBitContext gbc;
+int config_start_bit  = get_bits_count(gb);
+int sync_extension= 0;
+int bits_consumed, esize, i;
+
+if (asclen > 0) {
+sync_extension = 1;
+asclen = FFMIN(asclen, get_bits_left(gb));
+init_get_bits(, gb->buffer, config_start_bit + asclen);
+skip_bits_long(, config_start_bit);
+} else if (asclen == 0) {
+gbc = *gb;
+} else {
+return AVERROR_INVALIDDATA;
+}
+
+if (get_bits_left(gb) <= 0)
+return AVERROR_INVALIDDATA;
+
+bits_consumed = decode_audio_specific_config_gb(NULL, avctx, ,
+, config_start_bit,
+sync_extension);
+
+if (bits_consumed < config_start_bit)
+return AVERROR_INVALIDDATA;
+bits_consumed -= config_start_bit;
+
+if (asclen == 0)
+  asclen = bits_consumed;
+
+if (!latmctx->initialized ||
+ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
+ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
+
+if (latmctx->initialized) {
+av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, 
chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
+} else {
+av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
+}
+latmctx->initialized = 0;
+
+esize = (asclen + 7) / 8;
+
+if (avctx->extradata_size < esize) {
+av_free(avctx->extradata);
+avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
+if (!avctx->extradata)
+return AVERROR(ENOMEM);
+}
+
+avctx->extradata_size = esize;
+gbc = *gb;
+for (i = 0; i < esize; i++) {
+  avctx->extradata[i] = get_bits(, 8);
+}
+memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
+}
+skip_bits_long(gb, asclen);
+
+return 0;
+}
+
+static int read_stream_mux_config(struct LATMContext *latmctx,
+   

[FFmpeg-cvslog] aacdec: fully detemplate decoder core

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 06:27:39 2024 
+0100| [e93793bf3cf15968c34b0e7bf0c677fad3032f5d] | committer: Lynne

aacdec: fully detemplate decoder core

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e93793bf3cf15968c34b0e7bf0c677fad3032f5d
---

 libavcodec/Makefile   |   2 +-
 libavcodec/aac/aacdec_proc_template.c |   2 +-
 libavcodec/aacdec.c   |  22 +++-
 libavcodec/aacdec.h   |   2 -
 libavcodec/aacdec_fixed.c | 102 --
 libavcodec/aacdec_template.c  |  78 +++---
 libavcodec/aacsbr.h   |  22 +---
 7 files changed, 95 insertions(+), 135 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index f2da83c8eb..53c628f09c 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -184,7 +184,7 @@ OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aactab.o 
\
   aacsbr.o aacps_common.o 
aacps_float.o \
   kbdwin.o \
   sbrdsp.o aacpsdsp_float.o cbrt_data.o
-OBJS-$(CONFIG_AAC_FIXED_DECODER)   += aacdec_fixed.o aactab.o \
+OBJS-$(CONFIG_AAC_FIXED_DECODER)   += aactab.o \
   aacsbr_fixed.o aacps_common.o 
aacps_fixed.o \
   kbdwin.o \
   sbrdsp_fixed.o aacpsdsp_fixed.o 
cbrt_data_fixed.o
diff --git a/libavcodec/aac/aacdec_proc_template.c 
b/libavcodec/aac/aacdec_proc_template.c
index 609aa2fcc5..319bf61993 100644
--- a/libavcodec/aac/aacdec_proc_template.c
+++ b/libavcodec/aac/aacdec_proc_template.c
@@ -385,7 +385,7 @@ static int AAC_RENAME(decode_cce)(AACDecContext *ac, 
GetBitContext *gb, ChannelE
 scale = cce_scale[get_bits(gb, 2)];
 #endif
 
-if ((ret = AAC_RENAME(ff_aac_decode_ics)(ac, sce, gb, 0, 0)))
+if ((ret = ff_aac_decode_ics(ac, sce, gb, 0, 0)))
 return ret;
 
 for (c = 0; c < num_gain; c++) {
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index cc2b9bedfb..1e7bdb6416 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -32,7 +32,7 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-#define USE_FIXED 0
+#define USE_FIXED 1 // aacsbr.h breaks without this
 
 #include "libavutil/float_dsp.h"
 #include "avcodec.h"
@@ -399,3 +399,23 @@ const FFCodec ff_aac_latm_decoder = {
 .flush = flush,
 .p.profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 };
+
+const FFCodec ff_aac_fixed_decoder = {
+.p.name  = "aac_fixed",
+CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
+.p.type  = AVMEDIA_TYPE_AUDIO,
+.p.id= AV_CODEC_ID_AAC,
+.p.priv_class= _aac_decoder_class,
+.priv_data_size  = sizeof(AACDecContext),
+.init= aac_decode_init_fixed,
+.close   = ff_aac_decode_close,
+FF_CODEC_DECODE_CB(aac_decode_frame),
+.p.sample_fmts   = (const enum AVSampleFormat[]) {
+AV_SAMPLE_FMT_S32P, AV_SAMPLE_FMT_NONE
+},
+.p.capabilities  = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
+.caps_internal   = FF_CODEC_CAP_INIT_CLEANUP,
+.p.ch_layouts= ff_aac_ch_layout,
+.p.profiles  = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
+.flush = flush,
+};
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 2e3ee961b0..3d15cef453 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -348,7 +348,5 @@ void ff_aacdec_init_mips(AACDecContext *c);
 
 int ff_aac_decode_ics(AACDecContext *ac, SingleChannelElement *sce,
   GetBitContext *gb, int common_window, int scale_flag);
-int ff_aac_decode_ics_fixed(AACDecContext *ac, SingleChannelElement *sce,
-GetBitContext *gb, int common_window, int 
scale_flag);
 
 #endif /* AVCODEC_AACDEC_H */
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
deleted file mode 100644
index 91ec616644..00
--- a/libavcodec/aacdec_fixed.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2013
- *  MIPS Technologies, Inc., California.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
- *contributors may be used to endorse or promote products derived from
- *this software without specific prior written permission.

[FFmpeg-cvslog] aacdec: move prediction to separate files

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 05:16:50 2024 
+0100| [49e7be1e370a52d5ad2bc52830a2448384b5a58c] | committer: Lynne

aacdec: move prediction to separate files

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=49e7be1e370a52d5ad2bc52830a2448384b5a58c
---

 libavcodec/aac/aacdec_dsp_template.c |  48 ++
 libavcodec/aac/aacdec_fixed.c|   1 +
 libavcodec/aac/aacdec_fixed_prediction.h | 151 +++
 libavcodec/aac/aacdec_float.c|   1 +
 libavcodec/aac/aacdec_float_prediction.h | 100 
 libavcodec/aacdec.c  |  65 -
 libavcodec/aacdec.h  |   2 +
 libavcodec/aacdec_fixed.c| 116 
 libavcodec/aacdec_template.c |  52 +--
 9 files changed, 306 insertions(+), 230 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index adcafa10e7..338e512ed2 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -569,6 +569,52 @@ static void AAC_RENAME(clip_output)(AACDecContext *ac, 
ChannelElement *che,
 #endif
 }
 
+static inline void reset_all_predictors(PredictorState *ps)
+{
+int i;
+for (i = 0; i < MAX_PREDICTORS; i++)
+reset_predict_state([i]);
+}
+
+static inline void reset_predictor_group(PredictorState *ps, int group_num)
+{
+int i;
+for (i = group_num - 1; i < MAX_PREDICTORS; i += 30)
+reset_predict_state([i]);
+}
+
+/**
+ * Apply AAC-Main style frequency domain prediction.
+ */
+static void AAC_RENAME(apply_prediction)(AACDecContext *ac, 
SingleChannelElement *sce)
+{
+int sfb, k;
+
+if (!sce->ics.predictor_initialized) {
+reset_all_predictors(sce->AAC_RENAME(predictor_state));
+sce->ics.predictor_initialized = 1;
+}
+
+if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+for (sfb = 0;
+ sfb < ff_aac_pred_sfb_max[ac->oc[1].m4ac.sampling_index];
+ sfb++) {
+for (k = sce->ics.swb_offset[sfb];
+ k < sce->ics.swb_offset[sfb + 1];
+ k++) {
+predict(>AAC_RENAME(predictor_state)[k],
+>AAC_RENAME(coeffs)[k],
+sce->ics.predictor_present &&
+sce->ics.prediction_used[sfb]);
+}
+}
+if (sce->ics.predictor_reset_group)
+reset_predictor_group(sce->AAC_RENAME(predictor_state),
+  sce->ics.predictor_reset_group);
+} else
+reset_all_predictors(sce->AAC_RENAME(predictor_state));
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .init_tables = _RENAME(init_tables),
 
@@ -579,6 +625,8 @@ const AACDecDSP AAC_RENAME(aac_dsp) = {
 .apply_ltp = _RENAME(apply_ltp),
 .update_ltp = _RENAME(update_ltp),
 
+.apply_prediction = AAC_RENAME(apply_prediction),
+
 .imdct_and_windowing = AAC_RENAME(imdct_and_windowing),
 .imdct_and_windowing_960 = AAC_RENAME(imdct_and_windowing_960),
 .imdct_and_windowing_ld = AAC_RENAME(imdct_and_windowing_ld),
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index dc3e8eaab6..41f25d8148 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -79,5 +79,6 @@ static const int cce_scale_fixed[8] = {
 #include "aacdec_fixed_dequant.h"
 
 #include "aacdec_fixed_coupling.h"
+#include "aacdec_fixed_prediction.h"
 #include "aacdec_dsp_template.c"
 #include "aacdec_proc_template.c"
diff --git a/libavcodec/aac/aacdec_fixed_prediction.h 
b/libavcodec/aac/aacdec_fixed_prediction.h
new file mode 100644
index 00..6fb3354865
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_prediction.h
@@ -0,0 +1,151 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse 
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall 
+ * Copyright (c) 2010  Janne Grunau 
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *  MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Les

[FFmpeg-cvslog] aacdec: move fixed/float DSP initialization to templated init functions

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 06:05:45 2024 
+0100| [2f90d8398148b04db31e197f49154240c36e1849] | committer: Lynne

aacdec: move fixed/float DSP initialization to templated init functions

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f90d8398148b04db31e197f49154240c36e1849
---

 libavcodec/aac/aacdec.c  |  4 +---
 libavcodec/aac/aacdec_dsp_template.c |  2 +-
 libavcodec/aac/aacdec_fixed.c| 10 +-
 libavcodec/aac/aacdec_float.c| 16 +++-
 libavcodec/aacdec.h  |  2 +-
 libavcodec/aacdec_template.c | 35 ---
 6 files changed, 31 insertions(+), 38 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index d31c64d08d..3af0e808fd 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -124,9 +124,7 @@ av_cold int ff_aac_decode_init_common(AVCodecContext *avctx)
 ac->dsp = is_fixed ? aac_dsp_fixed : aac_dsp;
 ac->proc = is_fixed ? aac_proc_fixed : aac_proc;
 
-ac->dsp.init_tables();
-
-return 0;
+return ac->dsp.init(ac);
 }
 
 #define AACDEC_FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 338e512ed2..f260d32e4a 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -616,7 +616,7 @@ static void AAC_RENAME(apply_prediction)(AACDecContext *ac, 
SingleChannelElement
 }
 
 const AACDecDSP AAC_RENAME(aac_dsp) = {
-.init_tables = _RENAME(init_tables),
+.init = _RENAME(init),
 
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
 .apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index 41f25d8148..d706cfcc92 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -35,6 +35,8 @@
 
 #include "libavcodec/aac_defines.h"
 
+#include "libavcodec/avcodec.h"
+#include "libavcodec/aacdec.h"
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin_fixed_tablegen.h"
 #include "libavcodec/kbdwin.h"
@@ -58,10 +60,16 @@ static void init_tables_fixed_fn(void)
 init_sine_windows_fixed();
 }
 
-static void init_tables_fixed(void)
+static int init_fixed(AACDecContext *ac)
 {
 static AVOnce init_fixed_once = AV_ONCE_INIT;
 ff_thread_once(_fixed_once, init_tables_fixed_fn);
+
+ac->fdsp = avpriv_alloc_fixed_dsp(ac->avctx->flags & 
AV_CODEC_FLAG_BITEXACT);
+if (!ac->fdsp)
+return AVERROR(ENOMEM);
+
+return 0;
 }
 
 static const int cce_scale_fixed[8] = {
diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index 73aaa72f68..6801085098 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -35,6 +35,8 @@
 
 #include "libavcodec/aac_defines.h"
 
+#include "libavcodec/avcodec.h"
+#include "libavcodec/aacdec.h"
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin.h"
 #include "libavcodec/kbdwin.h"
@@ -61,10 +63,22 @@ static void init_tables_float_fn(void)
 AAC_RENAME(ff_init_ff_sine_windows)(9);
 }
 
-static void init_tables(void)
+static int init(AACDecContext *ac)
 {
 static AVOnce init_float_once = AV_ONCE_INIT;
 ff_thread_once(_float_once, init_tables_float_fn);
+
+ac->fdsp = avpriv_float_dsp_alloc(ac->avctx->flags & 
AV_CODEC_FLAG_BITEXACT);
+if (!ac->fdsp)
+return AVERROR(ENOMEM);
+
+ff_aac_float_common_init();
+
+#if ARCH_MIPS
+ff_aacdec_init_mips(ac);
+#endif
+
+return 0;
 }
 
 static const float cce_scale[] = {
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 2a997823ee..2e3ee961b0 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -216,7 +216,7 @@ typedef struct AACDecProc {
  * DSP-specific primitives
  */
 typedef struct AACDecDSP {
-void (*init_tables)(void);
+int (*init)(AACDecContext *ac);
 
 void (*dequant_scalefactors)(SingleChannelElement *sce);
 
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 30ec914520..ad40c0ca09 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1088,18 +1088,11 @@ static int sample_rate_idx (int rate)
 elsereturn 11;
 }
 
-static void aacdec_init(AACDecContext *ac);
-
 static av_cold void aac_static_table_init(void)
 {
 AAC_RENAME(ff_aac_sbr_init)();
 
 ff_aacdec_common_init_once();
-
-#if !USE_FIXED
-ff_aac_float_common_init();
-#else
-#endif
 }
 
 static AVOnce aac_table_init = AV_ONCE_INIT;
@@ -1121,12 +1114,10 @@ static av_cold int aac_decode_init(AVCodecContext 
*avctx)
 ac->avctx = avctx;
 ac->oc[1].m4ac.sample_rate = avctx->sample_rate;
 
-aacdec_init(ac);
-#if USE_FIXED
-avctx->

[FFmpeg-cvslog] aacdec: reuse TNS and LTP tables between fixed and float decoders

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 05:55:13 2024 
+0100| [905fdb06010e554262fca3c12b362bb69a11de85] | committer: Lynne

aacdec: reuse TNS and LTP tables between fixed and float decoders

The fixed decoder derives the values from floats anyway.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=905fdb06010e554262fca3c12b362bb69a11de85
---

 libavcodec/aacdec_fixed.c| 50 
 libavcodec/aacdec_template.c | 14 -
 2 files changed, 9 insertions(+), 55 deletions(-)

diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 880c18b0f8..91ec616644 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -79,56 +79,6 @@
 #include 
 #include 
 
-DECLARE_ALIGNED(32, int, AAC_RENAME2(aac_kbd_long_1024))[1024];
-DECLARE_ALIGNED(32, int, AAC_RENAME2(aac_kbd_short_128))[128];
-DECLARE_ALIGNED(32, int, AAC_RENAME2(aac_kbd_long_960))[960];
-DECLARE_ALIGNED(32, int, AAC_RENAME2(aac_kbd_short_120))[120];
-
-/* @name ltp_coef
- * Table of the LTP coefficients
- */
-static const int ltp_coef_fixed[8] = {
-Q30(0.570829), Q30(0.696616), Q30(0.813004), Q30(0.911304),
-Q30(0.984900), Q30(1.067894), Q30(1.194601), Q30(1.369533),
-};
-
-/* @name tns_tmp2_map
- * Tables of the tmp2[] arrays of LPC coefficients used for TNS.
- * The suffix _M_N[] indicate the values of coef_compress and coef_res
- * respectively.
- * @{
- */
-static const int tns_tmp2_map_1_3[4] = {
-Q31(0.), Q31(-0.43388373),  Q31(0.64278758),  Q31(0.34202015),
-};
-
-static const int tns_tmp2_map_0_3[8] = {
-Q31(0.), Q31(-0.43388373), Q31(-0.78183150), Q31(-0.97492790),
-Q31(0.98480773), Q31( 0.86602539), Q31( 0.64278758), Q31( 0.34202015),
-};
-
-static const int tns_tmp2_map_1_4[8] = {
-Q31(0.), Q31(-0.20791170), Q31(-0.40673664), Q31(-0.58778524),
-Q31(0.67369562), Q31( 0.52643216), Q31( 0.36124167), Q31( 0.18374951),
-};
-
-static const int tns_tmp2_map_0_4[16] = {
-Q31( 0.), Q31(-0.20791170), Q31(-0.40673664), Q31(-0.58778524),
-Q31(-0.74314481), Q31(-0.86602539), Q31(-0.95105654), Q31(-0.99452192),
-Q31( 0.99573416), Q31( 0.96182561), Q31( 0.89516330), Q31( 0.79801720),
-Q31( 0.67369562), Q31( 0.52643216), Q31( 0.36124167), Q31( 0.18374951),
-};
-
-static const int * const tns_tmp2_map_fixed[4] = {
-tns_tmp2_map_0_3,
-tns_tmp2_map_0_4,
-tns_tmp2_map_1_3,
-tns_tmp2_map_1_4
-};
-// @}
-
-static const int exp2tab[4] = { Q31(1.00/2), Q31(1.1892071150/2), 
Q31(1.4142135624/2), Q31(1.6817928305/2) };  // 2^0, 2^0.25, 2^0.5, 2^0.75
-
 #include "aacdec_template.c"
 
 const FFCodec ff_aac_fixed_decoder = {
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 1bc36809eb..30ec914520 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1218,13 +1218,17 @@ static int decode_prediction(AACDecContext *ac, 
IndividualChannelStream *ics,
 /**
  * Decode Long Term Prediction data; reference: table 4.xx.
  */
-static void decode_ltp(LongTermPrediction *ltp,
+static void decode_ltp(AACDecContext *ac, LongTermPrediction *ltp,
GetBitContext *gb, uint8_t max_sfb)
 {
 int sfb;
 
 ltp->lag  = get_bits(gb, 11);
-ltp->AAC_RENAME(coef) = AAC_RENAME2(ltp_coef)[get_bits(gb, 3)];
+if (ac->is_fixed)
+ltp->coef_fixed = Q30(ff_ltp_coef[get_bits(gb, 3)]);
+else
+ltp->coef = ff_ltp_coef[get_bits(gb, 3)];
+
 for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
 ltp->used[sfb] = get_bits1(gb);
 }
@@ -1331,7 +1335,7 @@ static int decode_ics_info(AACDecContext *ac, 
IndividualChannelStream *ics,
 goto fail;
 }
 if ((ics->ltp.present = get_bits(gb, 1)))
-decode_ltp(>ltp, gb, ics->max_sfb);
+decode_ltp(ac, >ltp, gb, ics->max_sfb);
 }
 }
 }
@@ -1531,7 +1535,7 @@ static int decode_tns(AACDecContext *ac, 
TemporalNoiseShaping *tns,
 tmp2_idx = 2 * coef_compress + coef_res;
 
 for (i = 0; i < tns->order[w][filt]; i++)
-tns->AAC_RENAME(coef)[w][filt][i] = 
AAC_RENAME2(tns_tmp2_map)[tmp2_idx][get_bits(gb, coef_len)];
+tns->AAC_RENAME(coef)[w][filt][i] = 
Q31(ff_tns_tmp2_map[tmp2_idx][get_bits(gb, coef_len)]);
 }
 }
 }
@@ -1704,7 +1708,7 @@ static int decode_cpe(AACDecContext *ac, GetBitContext 
*gb, ChannelElement *cpe)
 if (cpe->ch[1].ics.predictor_present &&
 (ac->oc[1].m4ac.object_type != AOT_AAC_MAIN))
 if ((cpe->ch[1].ics.ltp.present = get_bits(gb, 1)))
-decode_ltp(>ch[1].ics.ltp, gb, cpe->ch[1].ics.max_sfb);
+decode_ltp(ac, >ch[1].ics.ltp, gb, 
cpe->ch[1].ics.max_sfb);
   

[FFmpeg-cvslog] aacdec: move CCE decoding to a separate templated file

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 05:06:58 2024 
+0100| [b1718ce0f98216702e3330335ccf4be3b7896cbb] | committer: Lynne

aacdec: move CCE decoding to a separate templated file

Unfortunately, although it's a purely decode function, it does
need to be templated.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b1718ce0f98216702e3330335ccf4be3b7896cbb
---

 libavcodec/aac/aacdec_float.c |   8 +++
 libavcodec/aac/aacdec_proc_template.c |  85 ++
 libavcodec/aacdec.h   |   7 +++
 libavcodec/aacdec_template.c  | 109 +++---
 4 files changed, 109 insertions(+), 100 deletions(-)

diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index 865a57deb1..bbb4aec0a2 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -39,6 +39,7 @@
 #include "libavcodec/sinewin.h"
 #include "libavcodec/kbdwin.h"
 #include "libavcodec/cbrt_data.h"
+#include "libavutil/mathematics.h"
 
 DECLARE_ALIGNED(32, static float, sine_120)[120];
 DECLARE_ALIGNED(32, static float, sine_960)[960];
@@ -66,6 +67,13 @@ static void init_tables(void)
 ff_thread_once(_float_once, init_tables_float_fn);
 }
 
+static const float cce_scale[] = {
+1.09050773266525765921, //2^(1/8)
+1.18920711500272106672, //2^(1/4)
+M_SQRT2,
+2,
+};
+
 /** Dequantization-related **/
 #include "aacdec_tab.h"
 #include "libavutil/intfloat.h"
diff --git a/libavcodec/aac/aacdec_proc_template.c 
b/libavcodec/aac/aacdec_proc_template.c
index c3d607b4d3..609aa2fcc5 100644
--- a/libavcodec/aac/aacdec_proc_template.c
+++ b/libavcodec/aac/aacdec_proc_template.c
@@ -349,6 +349,91 @@ static int 
AAC_RENAME(decode_spectrum_and_dequant)(AACDecContext *ac,
 return 0;
 }
 
+/**
+ * Decode coupling_channel_element; reference: table 4.8.
+ *
+ * @return  Returns error status. 0 - OK, !0 - error
+ */
+static int AAC_RENAME(decode_cce)(AACDecContext *ac, GetBitContext *gb, 
ChannelElement *che)
+{
+int num_gain = 0;
+int c, g, sfb, ret;
+int sign;
+INTFLOAT scale;
+SingleChannelElement *sce = >ch[0];
+ChannelCoupling *coup = >coup;
+
+coup->coupling_point = 2 * get_bits1(gb);
+coup->num_coupled = get_bits(gb, 3);
+for (c = 0; c <= coup->num_coupled; c++) {
+num_gain++;
+coup->type[c] = get_bits1(gb) ? TYPE_CPE : TYPE_SCE;
+coup->id_select[c] = get_bits(gb, 4);
+if (coup->type[c] == TYPE_CPE) {
+coup->ch_select[c] = get_bits(gb, 2);
+if (coup->ch_select[c] == 3)
+num_gain++;
+} else
+coup->ch_select[c] = 2;
+}
+coup->coupling_point += get_bits1(gb) || (coup->coupling_point >> 1);
+
+sign  = get_bits(gb, 1);
+#if USE_FIXED
+scale = get_bits(gb, 2);
+#else
+scale = cce_scale[get_bits(gb, 2)];
+#endif
+
+if ((ret = AAC_RENAME(ff_aac_decode_ics)(ac, sce, gb, 0, 0)))
+return ret;
+
+for (c = 0; c < num_gain; c++) {
+int idx  = 0;
+int cge  = 1;
+int gain = 0;
+INTFLOAT gain_cache = FIXR10(1.);
+if (c) {
+cge = coup->coupling_point == AFTER_IMDCT ? 1 : get_bits1(gb);
+gain = cge ? get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 60: 0;
+gain_cache = GET_GAIN(scale, gain);
+#if USE_FIXED
+if ((abs(gain_cache)-1024) >> 3 > 30)
+return AVERROR(ERANGE);
+#endif
+}
+if (coup->coupling_point == AFTER_IMDCT) {
+coup->gain[c][0] = gain_cache;
+} else {
+for (g = 0; g < sce->ics.num_window_groups; g++) {
+for (sfb = 0; sfb < sce->ics.max_sfb; sfb++, idx++) {
+if (sce->band_type[idx] != ZERO_BT) {
+if (!cge) {
+int t = get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 
60;
+if (t) {
+int s = 1;
+t = gain += t;
+if (sign) {
+s  -= 2 * (t & 0x1);
+t >>= 1;
+}
+gain_cache = GET_GAIN(scale, t) * s;
+#if USE_FIXED
+if ((abs(gain_cache)-1024) >> 3 > 30)
+return AVERROR(ERANGE);
+#endif
+}
+}
+coup->gain[c][idx] = gain_cache;
+}
+}
+}
+}
+}
+return 0;
+}
+
 const AACDecProc AAC_RENAME(aac_proc) = {
 .decode_spectrum_and_dequant = AAC_RENAME(decode_spectrum_and_dequant),
+.decode_cce = AAC_RENAME(decode_cce),
 };
diff --g

[FFmpeg-cvslog] aacdec: move fixed-point clipping to a separate function

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 04:29:07 2024 
+0100| [41c04bec0ad306cf6d0b9af19f904a7c86582bdf] | committer: Lynne

aacdec: move fixed-point clipping to a separate function

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41c04bec0ad306cf6d0b9af19f904a7c86582bdf
---

 libavcodec/aac/aacdec_dsp_template.c | 17 +
 libavcodec/aacdec.h  |  2 ++
 libavcodec/aacdec_template.c | 15 +--
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index a72ccf891c..adcafa10e7 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -554,6 +554,21 @@ static void 
AAC_RENAME(imdct_and_windowing_eld)(AACDecContext *ac, SingleChannel
 memcpy( saved,   buf, n * sizeof(*saved));
 }
 
+static void AAC_RENAME(clip_output)(AACDecContext *ac, ChannelElement *che,
+int type, int samples)
+{
+#if USE_FIXED
+/* preparation for resampler */
+for (int j = 0; j < samples; j++){
+che->ch[0].output_fixed[j] = 
(int32_t)av_clip64((int64_t)che->ch[0].output_fixed[j]*128,
+INT32_MIN, 
INT32_MAX-0x8000)+0x8000;
+if (type == TYPE_CPE || (type == TYPE_SCE && ac->oc[1].m4ac.ps == 1))
+che->ch[1].output_fixed[j] = 
(int32_t)av_clip64((int64_t)che->ch[1].output_fixed[j]*128,
+INT32_MIN, 
INT32_MAX-0x8000)+0x8000;
+}
+#endif
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .init_tables = _RENAME(init_tables),
 
@@ -571,4 +586,6 @@ const AACDecDSP AAC_RENAME(aac_dsp) = {
 
 .apply_dependent_coupling = AAC_RENAME(apply_dependent_coupling),
 .apply_independent_coupling = AAC_RENAME(apply_independent_coupling),
+
+.clip_output = AAC_RENAME(clip_output),
 };
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 91ffb877da..c8bcae4c4f 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -239,6 +239,8 @@ typedef struct AACDecDSP {
 void (*imdct_and_windowing_960)(AACDecContext *ac, SingleChannelElement 
*sce);
 void (*imdct_and_windowing_ld)(AACDecContext *ac, SingleChannelElement 
*sce);
 void (*imdct_and_windowing_eld)(AACDecContext *ac, SingleChannelElement 
*sce);
+
+void (*clip_output)(AACDecContext *ac, ChannelElement *che, int type, int 
samples);
 } AACDecDSP;
 
 /**
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 3c77b41694..d56801fc66 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -2129,20 +2129,7 @@ static void spectral_to_sample(AACDecContext *ac, int 
samples)
 }
 if (type <= TYPE_CCE)
 apply_channel_coupling(ac, che, type, i, AFTER_IMDCT, 
ac->dsp.apply_independent_coupling);
-
-#if USE_FIXED
-{
-int j;
-/* preparation for resampler */
-for(j = 0; jch[0].output_fixed[j] = 
(int32_t)av_clip64((int64_t)che->ch[0].output_fixed[j]*128,
-INT32_MIN, 
INT32_MAX-0x8000)+0x8000;
-if (type == TYPE_CPE || (type == TYPE_SCE && 
ac->oc[1].m4ac.ps == 1))
-che->ch[1].output_fixed[j] = 
(int32_t)av_clip64((int64_t)che->ch[1].output_fixed[j]*128,
-
INT32_MIN, INT32_MAX-0x8000)+0x8000;
-}
-}
-#endif /* USE_FIXED */
+ac->dsp.clip_output(ac, che, type, samples);
 che->present = 0;
 } else if (che) {
 av_log(ac->avctx, AV_LOG_VERBOSE, "ChannelElement %d.%d 
missing \n", type, i);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: split off channel coupling into a new file

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 04:17:30 2024 
+0100| [87a93a5670dde2dc0087e275912be07c9310865f] | committer: Lynne

aacdec: split off channel coupling into a new file

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=87a93a5670dde2dc0087e275912be07c9310865f
---

 libavcodec/aac/aacdec_dsp_template.c   |   3 +
 libavcodec/aac/aacdec_fixed.c  |  12 +++
 libavcodec/aac/aacdec_fixed_coupling.h | 137 +
 libavcodec/aac/aacdec_float.c  |   1 +
 libavcodec/aac/aacdec_float_coupling.h |  90 ++
 libavcodec/aacdec.c|  53 -
 libavcodec/aacdec.h|   7 ++
 libavcodec/aacdec_fixed.c  | 112 ---
 libavcodec/aacdec_template.c   |   6 +-
 9 files changed, 253 insertions(+), 168 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index a04d358883..a72ccf891c 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -568,4 +568,7 @@ const AACDecDSP AAC_RENAME(aac_dsp) = {
 .imdct_and_windowing_960 = AAC_RENAME(imdct_and_windowing_960),
 .imdct_and_windowing_ld = AAC_RENAME(imdct_and_windowing_ld),
 .imdct_and_windowing_eld = AAC_RENAME(imdct_and_windowing_eld),
+
+.apply_dependent_coupling = AAC_RENAME(apply_dependent_coupling),
+.apply_independent_coupling = AAC_RENAME(apply_independent_coupling),
 };
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index 08e9c3a03f..dc3e8eaab6 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -64,8 +64,20 @@ static void init_tables_fixed(void)
 ff_thread_once(_fixed_once, init_tables_fixed_fn);
 }
 
+static const int cce_scale_fixed[8] = {
+Q30(1.0),  //2^(0/8)
+Q30(1.0905077327), //2^(1/8)
+Q30(1.1892071150), //2^(2/8)
+Q30(1.2968395547), //2^(3/8)
+Q30(1.4142135624), //2^(4/8)
+Q30(1.5422108254), //2^(5/8)
+Q30(1.6817928305), //2^(6/8)
+Q30(1.8340080864), //2^(7/8)
+};
+
 /** Dequantization-related */
 #include "aacdec_fixed_dequant.h"
 
+#include "aacdec_fixed_coupling.h"
 #include "aacdec_dsp_template.c"
 #include "aacdec_proc_template.c"
diff --git a/libavcodec/aac/aacdec_fixed_coupling.h 
b/libavcodec/aac/aacdec_fixed_coupling.h
new file mode 100644
index 00..b6a54b35bd
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_coupling.h
@@ -0,0 +1,137 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse 
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall 
+ * Copyright (c) 2010  Janne Grunau 
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *  MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
+#define AVCODEC_AAC_AACDEC_FIXED_COUPLING_H
+
+#include "libavcodec/aacdec.h"
+
+/**
+ * Apply dependent channel coupling (applied before IMDCT).
+ *
+ * @param   index   index into coupling gain array
+ */
+static void AAC_RENAME(apply_dependent_coupling)(AACDecContext *ac,
+ SingleChannelElement *target,
+ ChannelElement *cce, int 
index)
+{
+IndividualChannelStream *ics = >ch[0].ics;
+const uint16_t *offsets = ics->swb_offset;
+int *dest = target->coeffs_fixed;
+const int *src = cce->ch[0].coeffs_fixed;
+int g, i, group, k, idx = 0;
+if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
+av_log(ac->avctx, AV_LOG_ERROR,
+   "Dependent coupling is not supported together with LTP\n");
+return;
+}
+for (g = 0; g < ics->num_window_groups; g++) {
+for (i = 0; i < ics->max_sfb; i++, idx++) {
+if (cce->ch[0].band_type[idx] != ZERO_BT) {
+const int gain = cce->coup.gain[index][idx];
+int shift, ro

[FFmpeg-cvslog] aacdec: move spectrum decode and dequantization to a new file

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 02:43:33 2024 
+0100| [41ae2b03a5cf87f9673f82efbc9cea53df70f150] | committer: Lynne

aacdec: move spectrum decode and dequantization to a new file

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=41ae2b03a5cf87f9673f82efbc9cea53df70f150
---

 libavcodec/aac/aacdec.c   |   4 +
 libavcodec/aac/aacdec_dsp_template.c  |   2 +-
 libavcodec/aac/aacdec_fixed.c |   5 +
 libavcodec/aac/aacdec_fixed_dequant.h | 174 +
 libavcodec/aac/aacdec_float.c |  74 +++
 libavcodec/aac/aacdec_proc_template.c | 354 ++
 libavcodec/aacdec.c   |  68 ---
 libavcodec/aacdec.h   |  15 +-
 libavcodec/aacdec_fixed.c | 130 -
 libavcodec/aacdec_template.c  | 331 +--
 10 files changed, 626 insertions(+), 531 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 26612f4a14..d31c64d08d 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -48,6 +48,9 @@
 extern const AACDecDSP aac_dsp;
 extern const AACDecDSP aac_dsp_fixed;
 
+extern const AACDecProc aac_proc;
+extern const AACDecProc aac_proc_fixed;
+
 av_cold int ff_aac_decode_close(AVCodecContext *avctx)
 {
 AACDecContext *ac = avctx->priv_data;
@@ -119,6 +122,7 @@ av_cold int ff_aac_decode_init_common(AVCodecContext *avctx)
 return ret;
 
 ac->dsp = is_fixed ? aac_dsp_fixed : aac_dsp;
+ac->proc = is_fixed ? aac_proc_fixed : aac_proc;
 
 ac->dsp.init_tables();
 
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 56c51c3e07..a04d358883 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -151,7 +151,7 @@ static void 
AAC_RENAME(apply_intensity_stereo)(AACDecContext *ac,
 scale = c * sce1->AAC_RENAME(sf)[idx];
 for (group = 0; group < ics->group_len[g]; group++)
 #if USE_FIXED
-ac->subband_scale(coef1 + group * 128 + offsets[i],
+subband_scale(coef1 + group * 128 + offsets[i],
   coef0 + group * 128 + offsets[i],
   scale,
   23,
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index 9dd8f34f55..a2ddc5aac2 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -38,6 +38,7 @@
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin_fixed_tablegen.h"
 #include "libavcodec/kbdwin.h"
+#include "libavcodec/cbrt_data.h"
 
 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME2(aac_kbd_long_1024))[1024];
 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME2(aac_kbd_short_128))[128];
@@ -61,4 +62,8 @@ static void init_tables_fixed(void)
 ff_thread_once(_fixed_once, init_tables_fixed_fn);
 }
 
+/** Dequantization-related */
+#include "aacdec_fixed_dequant.h"
+
 #include "aacdec_dsp_template.c"
+#include "aacdec_proc_template.c"
diff --git a/libavcodec/aac/aacdec_fixed_dequant.h 
b/libavcodec/aac/aacdec_fixed_dequant.h
new file mode 100644
index 00..5fb84fbed0
--- /dev/null
+++ b/libavcodec/aac/aacdec_fixed_dequant.h
@@ -0,0 +1,174 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse 
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall 
+ * Copyright (c) 2010  Janne Grunau 
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *  MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_AACDEC_FIXED_DEQUANT_H
+#define AVCODEC_AAC_AACDEC_FIXED_DEQUANT_H
+
+#include "aacdec_tab.h"
+
+static void inline vector_pow43(int *coefs, int len)
+{
+int i, coef;
+
+for (i=0; i> 2);
+
+if (s > 31) {
+for (i=0; i 0) {
+round = 1 << (s-1);
+for (i=0; i> 32);
+  

[FFmpeg-cvslog] aacdec: deduplicate table initizalization

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 04:03:12 2024 
+0100| [b7387ea00e8346022877f2f082ce2d2bc7a217e5] | committer: Lynne

aacdec: deduplicate table initizalization

All tables now initialized by aac/aacdec_fixed|float

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b7387ea00e8346022877f2f082ce2d2bc7a217e5
---

 libavcodec/aac/aacdec_fixed.c |  2 ++
 libavcodec/aac/aacdec_float.c |  2 ++
 libavcodec/aacdec.c   |  5 -
 libavcodec/aacdec_fixed.c |  2 --
 libavcodec/aacdec_template.c  | 12 
 5 files changed, 4 insertions(+), 19 deletions(-)

diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index a2ddc5aac2..08e9c3a03f 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -47,6 +47,8 @@ DECLARE_ALIGNED(32, static INTFLOAT, 
AAC_RENAME(aac_kbd_short_120))[120];
 
 static void init_tables_fixed_fn(void)
 {
+AAC_RENAME(ff_cbrt_tableinit)();
+
 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
 
diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index 355980d169..d1cc5ce929 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -47,6 +47,8 @@ DECLARE_ALIGNED(32, static float, aac_kbd_short_120)[120];
 
 static void init_tables_float_fn(void)
 {
+AAC_RENAME(ff_cbrt_tableinit)();
+
 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
 AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
 
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index 9642c45015..13d53c6cfc 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -63,11 +63,6 @@
 #   include "mips/aacdec_mips.h"
 #endif
 
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
-DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
-
 static av_always_inline void reset_predict_state(PredictorState *ps)
 {
 ps->r0   = 0.0f;
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index efc666a6ce..f1eb072103 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_fixed.c
@@ -64,8 +64,6 @@
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "get_bits.h"
-#include "kbdwin.h"
-#include "sinewin_fixed_tablegen.h"
 
 #include "aac.h"
 #include "aacdec.h"
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 167e349b3e..c3ab1ed4c6 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1110,22 +1110,10 @@ static av_cold void aac_static_table_init(void)
 
 ff_aacdec_common_init_once();
 
-// window initialization
-AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
-AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
-
 #if !USE_FIXED
-AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960);
-AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120);
-AAC_RENAME(ff_init_ff_sine_windows)(9);
 ff_aac_float_common_init();
 #else
-AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
-AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
-init_sine_windows_fixed();
 #endif
-
-AAC_RENAME(ff_cbrt_tableinit)();
 }
 
 static AVOnce aac_table_init = AV_ONCE_INIT;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: template LTP application separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Mar 14 04:52:28 2024 
+0100| [e9fc7661daac9c0df0747e11435570899652d686] | committer: Lynne

aacdec: template LTP application separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e9fc7661daac9c0df0747e11435570899652d686
---

 libavcodec/aac/aacdec_dsp_template.c | 76 -
 libavcodec/aac/aacdec_fixed.c|  8 
 libavcodec/aac/aacdec_float.c|  5 +++
 libavcodec/aacdec.h  |  2 -
 libavcodec/aacdec_fixed.c|  8 ++--
 libavcodec/aacdec_template.c | 83 ++--
 libavcodec/mips/aacdec_mips.c|  2 +-
 7 files changed, 97 insertions(+), 87 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index da7f5fac4f..5e18b30d99 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -30,7 +30,6 @@
  */
 
 #include "libavcodec/aacdec.h"
-#include "libavcodec/aac_defines.h"
 #include "libavcodec/lpc_functions.h"
 
 #include "libavcodec/aactab.h"
@@ -238,9 +237,84 @@ static void AAC_RENAME(apply_tns)(void *_coef_param, 
TemporalNoiseShaping *tns,
 }
 }
 
+/**
+ * Apply the long term prediction
+ */
+static void AAC_RENAME(apply_ltp)(AACDecContext *ac, SingleChannelElement *sce)
+{
+const LongTermPrediction *ltp = >ics.ltp;
+const uint16_t *offsets = sce->ics.swb_offset;
+int i, sfb;
+
+if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+INTFLOAT *predTime = sce->AAC_RENAME(output);
+INTFLOAT *predFreq = ac->AAC_RENAME(buf_mdct);
+int16_t num_samples = 2048;
+
+if (ltp->lag < 1024)
+num_samples = ltp->lag + 1024;
+for (i = 0; i < num_samples; i++)
+predTime[i] = AAC_MUL30(sce->AAC_RENAME(ltp_state)[i + 2048 - 
ltp->lag], ltp->AAC_RENAME(coef));
+memset([i], 0, (2048 - i) * sizeof(*predTime));
+
+ac->AAC_RENAME(windowing_and_mdct_ltp)(ac, predFreq, predTime, 
>ics);
+
+if (sce->tns.present)
+AAC_RENAME(apply_tns)(predFreq, >tns, >ics, 0);
+
+for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
+if (ltp->used[sfb])
+for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
+sce->AAC_RENAME(coeffs)[i] += (UINTFLOAT)predFreq[i];
+}
+}
+
+/**
+ * Update the LTP buffer for next frame
+ */
+static void AAC_RENAME(update_ltp)(AACDecContext *ac, SingleChannelElement 
*sce)
+{
+IndividualChannelStream *ics = >ics;
+INTFLOAT *saved = sce->AAC_RENAME(saved);
+INTFLOAT *saved_ltp = sce->AAC_RENAME(coeffs);
+const INTFLOAT *lwindow = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+const INTFLOAT *swindow = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+int i;
+
+if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+memcpy(saved_ltp,   saved, 512 * sizeof(*saved_ltp));
+memset(saved_ltp + 576, 0, 448 * sizeof(*saved_ltp));
+ac->fdsp->vector_fmul_reverse(saved_ltp + 448, 
ac->AAC_RENAME(buf_mdct) + 960, [64],  64);
+
+for (i = 0; i < 64; i++)
+saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], 
swindow[63 - i]);
+} else if (1 && ics->window_sequence[0] == LONG_START_SEQUENCE) {
+memcpy(saved_ltp,   ac->AAC_RENAME(buf_mdct) + 512, 448 * 
sizeof(*saved_ltp));
+memset(saved_ltp + 576, 0,  448 * sizeof(*saved_ltp));
+ac->fdsp->vector_fmul_reverse(saved_ltp + 448, 
ac->AAC_RENAME(buf_mdct) + 960, [64],  64);
+
+for (i = 0; i < 64; i++)
+saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], 
swindow[63 - i]);
+} else if (1) { // LONG_STOP or ONLY_LONG
+ac->fdsp->vector_fmul_reverse(saved_ltp, ac->AAC_RENAME(buf_mdct) + 
512, [512], 512);
+
+for (i = 0; i < 512; i++)
+saved_ltp[i + 512] = AAC_MUL31(ac->AAC_RENAME(buf_mdct)[1023 - i], 
lwindow[511 - i]);
+}
+
+memcpy(sce->AAC_RENAME(ltp_state),  sce->AAC_RENAME(ltp_state)+1024,
+   1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+memcpy(sce->AAC_RENAME(ltp_state) + 1024, sce->AAC_RENAME(output),
+   1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+memcpy(sce->AAC_RENAME(ltp_state) + 2048, saved_ltp,
+   1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
 .apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
 .apply_intensity_stereo = _RENAME(apply_intensity_stereo),
 .apply_tns = _R

[FFmpeg-cvslog] aacdec: template windowing and transforms separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 00:14:32 2024 
+0100| [091d85217d82d5ee0f96b4dbc665cb3c598a0451] | committer: Lynne

aacdec: template windowing and transforms separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=091d85217d82d5ee0f96b4dbc665cb3c598a0451
---

 libavcodec/aac/aacdec_dsp_template.c | 221 ++
 libavcodec/aacdec.h  |   1 -
 libavcodec/aacdec_template.c | 225 +--
 libavcodec/mips/aacdec_mips.c|   2 +-
 4 files changed, 226 insertions(+), 223 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 30151604c2..c607383d67 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -310,6 +310,222 @@ static void AAC_RENAME(update_ltp)(AACDecContext *ac, 
SingleChannelElement *sce)
1024 * sizeof(*sce->AAC_RENAME(ltp_state)));
 }
 
+/**
+ * Conduct IMDCT and windowing.
+ */
+static void AAC_RENAME(imdct_and_windowing)(AACDecContext *ac, 
SingleChannelElement *sce)
+{
+IndividualChannelStream *ics = >ics;
+INTFLOAT *in= sce->AAC_RENAME(coeffs);
+INTFLOAT *out   = sce->AAC_RENAME(output);
+INTFLOAT *saved = sce->AAC_RENAME(saved);
+const INTFLOAT *swindow  = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+INTFLOAT *buf  = ac->AAC_RENAME(buf_mdct);
+INTFLOAT *temp = ac->AAC_RENAME(temp);
+int i;
+
+// imdct
+if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+for (i = 0; i < 1024; i += 128)
+ac->mdct128_fn(ac->mdct128, buf + i, in + i, sizeof(INTFLOAT));
+} else {
+ac->mdct1024_fn(ac->mdct1024, buf, in, sizeof(INTFLOAT));
+}
+
+/* window overlapping
+ * NOTE: To simplify the overlapping code, all 'meaningless' short to long
+ * and long to short transitions are considered to be short to short
+ * transitions. This leaves just two cases (long to long and short to 
short)
+ * with a little special sauce for EIGHT_SHORT_SEQUENCE.
+ */
+if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || 
ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
+(ics->window_sequence[0] == ONLY_LONG_SEQUENCE || 
ics->window_sequence[0] == LONG_START_SEQUENCE)) {
+ac->fdsp->vector_fmul_window(out,   saved,
buf, lwindow_prev, 512);
+} else {
+memcpy( out,   saved,
448 * sizeof(*out));
+
+if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448,  
buf + 0*128, swindow_prev, 64);
+ac->fdsp->vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, 
buf + 1*128, swindow,  64);
+ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, 
buf + 2*128, swindow,  64);
+ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, 
buf + 3*128, swindow,  64);
+ac->fdsp->vector_fmul_window(temp,  buf + 3*128 + 64, 
buf + 4*128, swindow,  64);
+memcpy( out + 448 + 4*128, temp, 64 * 
sizeof(*out));
+} else {
+ac->fdsp->vector_fmul_window(out + 448, saved + 448,  
buf, swindow_prev, 64);
+memcpy( out + 576, buf + 64, 
448 * sizeof(*out));
+}
+}
+
+// buffer update
+if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
+memcpy( saved,   temp + 64, 64 * 
sizeof(*saved));
+ac->fdsp->vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 
5*128, swindow, 64);
+ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 
6*128, swindow, 64);
+ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 
7*128, swindow, 64);
+memcpy( saved + 448, buf + 7*128 + 64,  64 * 
sizeof(*saved));
+} else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
+memcpy( saved,   buf + 512,448 * 
sizeof(*saved));
+memcpy( saved + 448, buf + 7*128 + 64,  64 * 
sizeof(*saved));
+} else { // LONG_STOP or ONLY_LONG
+memcpy( saved,   buf + 512,512 * 
sizeof(*saved));
+}
+}
+
+/**
+ * Conduct IMDCT and windowing.
+ */
+static void AAC_RENAME(imdct_and_windowing_960)(AACDecContext *ac, 
Single

[FFmpeg-cvslog] aacdec: remove unnecessary decode_spectrum_and_dequant arguments

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 02:19:35 2024 
+0100| [5c026e66372f31c7bdeccc45511c9f5eeac22865] | committer: Lynne

aacdec: remove unnecessary decode_spectrum_and_dequant arguments

Small cleanup to reduce number of arguments.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5c026e66372f31c7bdeccc45511c9f5eeac22865
---

 libavcodec/aacdec_template.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index a8fb8606e1..01ae847264 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1610,12 +1610,13 @@ static void decode_mid_side_stereo(ChannelElement *cpe, 
GetBitContext *gb,
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int decode_spectrum_and_dequant(AACDecContext *ac, INTFLOAT coef[1024],
+static int decode_spectrum_and_dequant(AACDecContext *ac,
GetBitContext *gb,
-   int pulse_present, const Pulse *pulse,
+   const Pulse *pulse,
SingleChannelElement *sce)
 {
 int i, k, g, idx = 0;
+INTFLOAT *coef = sce->AAC_RENAME(coeffs);
 IndividualChannelStream *ics = >ics;
 const int c = 1024 / ics->num_windows;
 const uint16_t *offsets = ics->swb_offset;
@@ -1856,7 +1857,7 @@ static int decode_spectrum_and_dequant(AACDecContext *ac, 
INTFLOAT coef[1024],
 coef += g_len << 7;
 }
 
-if (pulse_present) {
+if (pulse) {
 idx = 0;
 for (i = 0; i < pulse->num_pulse; i++) {
 INTFLOAT co = coef_base[ pulse->pos[i] ];
@@ -1977,7 +1978,6 @@ static int decode_ics(AACDecContext *ac, 
SingleChannelElement *sce,
 Pulse pulse;
 TemporalNoiseShaping*tns = >tns;
 IndividualChannelStream *ics = >ics;
-INTFLOAT *out = sce->AAC_RENAME(coeffs);
 int global_gain, eld_syntax, er_syntax, pulse_present = 0;
 int ret;
 
@@ -2047,9 +2047,9 @@ static int decode_ics(AACDecContext *ac, 
SingleChannelElement *sce,
 }
 }
 
-ret = decode_spectrum_and_dequant(ac, out, gb,
-  pulse_present,
-  , sce);
+ret = decode_spectrum_and_dequant(ac, gb,
+  pulse_present ?  : NULL,
+  sce);
 if (ret < 0)
 goto fail;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: template LTP windowing separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 01:28:41 2024 
+0100| [eef9100a8e57fe9d0642aeb69c82a5abfc01e962] | committer: Lynne

aacdec: template LTP windowing separately

The function is called only internally in DSP, so we do not
need to expose it.

apply_ltp on MIPS uses this function, but due to the function
being just a glue function with no real optimizations,
duplicate it there.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eef9100a8e57fe9d0642aeb69c82a5abfc01e962
---

 libavcodec/aac/aacdec_dsp_template.c | 30 +-
 libavcodec/aacdec.h  |  6 --
 libavcodec/aacdec_template.c | 28 
 libavcodec/mips/aacdec_mips.c| 30 +-
 4 files changed, 58 insertions(+), 36 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index c607383d67..56c51c3e07 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -237,6 +237,34 @@ static void AAC_RENAME(apply_tns)(void *_coef_param, 
TemporalNoiseShaping *tns,
 }
 }
 
+/**
+ *  Apply windowing and MDCT to obtain the spectral
+ *  coefficient from the predicted sample by LTP.
+ */
+static inline void AAC_RENAME(windowing_and_mdct_ltp)(AACDecContext *ac,
+  INTFLOAT *out, INTFLOAT 
*in,
+  IndividualChannelStream 
*ics)
+{
+const INTFLOAT *lwindow  = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+const INTFLOAT *swindow  = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
+const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
+
+if (ics->window_sequence[0] != LONG_STOP_SEQUENCE) {
+ac->fdsp->vector_fmul(in, in, lwindow_prev, 1024);
+} else {
+memset(in, 0, 448 * sizeof(*in));
+ac->fdsp->vector_fmul(in + 448, in + 448, swindow_prev, 128);
+}
+if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
+ac->fdsp->vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
+} else {
+ac->fdsp->vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, 
swindow, 128);
+memset(in + 1024 + 576, 0, 448 * sizeof(*in));
+}
+ac->mdct_ltp_fn(ac->mdct_ltp, out, in, sizeof(INTFLOAT));
+}
+
 /**
  * Apply the long term prediction
  */
@@ -257,7 +285,7 @@ static void AAC_RENAME(apply_ltp)(AACDecContext *ac, 
SingleChannelElement *sce)
 predTime[i] = AAC_MUL30(sce->AAC_RENAME(ltp_state)[i + 2048 - 
ltp->lag], ltp->AAC_RENAME(coef));
 memset([i], 0, (2048 - i) * sizeof(*predTime));
 
-ac->AAC_RENAME(windowing_and_mdct_ltp)(ac, predFreq, predTime, 
>ics);
+AAC_RENAME(windowing_and_mdct_ltp)(ac, predFreq, predTime, >ics);
 
 if (sce->tns.present)
 AAC_RENAME(apply_tns)(predFreq, >tns, >ics, 0);
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index a0f8790e17..87462adb02 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -311,12 +311,6 @@ struct AACDecContext {
 int is_fixed;
 
 /* aacdec functions pointers */
-union {
-void (*windowing_and_mdct_ltp)(struct AACDecContext *ac, float *out,
-   float *in, IndividualChannelStream 
*ics);
-void (*windowing_and_mdct_ltp_fixed)(struct AACDecContext *ac, int 
*out,
- int *in, IndividualChannelStream 
*ics);
-};
 void (*vector_pow43)(int *coefs, int len);
 void (*subband_scale)(int *dst, int *src, int scale, int offset, int len, 
void *log_context);
 };
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 7985a213eb..a8fb8606e1 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -2367,33 +2367,6 @@ static int decode_extension_payload(AACDecContext *ac, 
GetBitContext *gb, int cn
 return res;
 }
 
-/**
- *  Apply windowing and MDCT to obtain the spectral
- *  coefficient from the predicted sample by LTP.
- */
-static void windowing_and_mdct_ltp(AACDecContext *ac, INTFLOAT *out,
-   INTFLOAT *in, IndividualChannelStream *ics)
-{
-const INTFLOAT *lwindow  = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-const INTFLOAT *swindow  = ics->use_kb_window[0] ? 
AAC_RENAME2(aac_kbd_short_128) : AAC_RENAME2(sine_128);
-const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? 
AAC_RENAME2(aac_kbd_long_1024) : AAC_RENAME2(sine_1024);
-const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? 

[FFmpeg-cvslog] aacdec: duplicate table initialization

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Mar 16 00:12:56 2024 
+0100| [a309aa412755f7bb16eead43bcaff1a9dcdb5954] | committer: Lynne

aacdec: duplicate table initialization

Preparation to move all table init and support windowing functions.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a309aa412755f7bb16eead43bcaff1a9dcdb5954
---

 libavcodec/aac/aacdec.c  |  2 ++
 libavcodec/aac/aacdec_dsp_template.c |  2 ++
 libavcodec/aac/aacdec_fixed.c| 26 --
 libavcodec/aac/aacdec_float.c| 27 +++
 libavcodec/aacdec.h  |  2 ++
 5 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 00353bddc7..26612f4a14 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -120,6 +120,8 @@ av_cold int ff_aac_decode_init_common(AVCodecContext *avctx)
 
 ac->dsp = is_fixed ? aac_dsp_fixed : aac_dsp;
 
+ac->dsp.init_tables();
+
 return 0;
 }
 
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 5e18b30d99..30151604c2 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -311,6 +311,8 @@ static void AAC_RENAME(update_ltp)(AACDecContext *ac, 
SingleChannelElement *sce)
 }
 
 const AACDecDSP AAC_RENAME(aac_dsp) = {
+.init_tables = _RENAME(init_tables),
+
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
 .apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
 .apply_intensity_stereo = _RENAME(apply_intensity_stereo),
diff --git a/libavcodec/aac/aacdec_fixed.c b/libavcodec/aac/aacdec_fixed.c
index 9b66f22d2f..9dd8f34f55 100644
--- a/libavcodec/aac/aacdec_fixed.c
+++ b/libavcodec/aac/aacdec_fixed.c
@@ -31,12 +31,34 @@
 
 #define USE_FIXED 1
 
+#include "libavutil/thread.h"
+
 #include "libavcodec/aac_defines.h"
 
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin_fixed_tablegen.h"
+#include "libavcodec/kbdwin.h"
+
+DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME2(aac_kbd_long_1024))[1024];
+DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME2(aac_kbd_short_128))[128];
+DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
+DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
+
+static void init_tables_fixed_fn(void)
+{
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
+
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
+
+init_sine_windows_fixed();
+}
 
-DECLARE_ALIGNED(32, extern int, AAC_RENAME2(aac_kbd_long_1024))[1024];
-DECLARE_ALIGNED(32, extern int, AAC_RENAME2(aac_kbd_short_128))[128];
+static void init_tables_fixed(void)
+{
+static AVOnce init_fixed_once = AV_ONCE_INIT;
+ff_thread_once(_fixed_once, init_tables_fixed_fn);
+}
 
 #include "aacdec_dsp_template.c"
diff --git a/libavcodec/aac/aacdec_float.c b/libavcodec/aac/aacdec_float.c
index ba1b06cc21..6a5e8483b0 100644
--- a/libavcodec/aac/aacdec_float.c
+++ b/libavcodec/aac/aacdec_float.c
@@ -31,9 +31,36 @@
 
 #define USE_FIXED 0
 
+#include "libavutil/thread.h"
+
 #include "libavcodec/aac_defines.h"
 
 #include "libavcodec/aactab.h"
 #include "libavcodec/sinewin.h"
+#include "libavcodec/kbdwin.h"
+
+DECLARE_ALIGNED(32, static float, sine_120)[120];
+DECLARE_ALIGNED(32, static float, sine_960)[960];
+DECLARE_ALIGNED(32, static float, aac_kbd_long_960)[960];
+DECLARE_ALIGNED(32, static float, aac_kbd_short_120)[120];
+
+static void init_tables_float_fn(void)
+{
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_long_1024), 4.0, 1024);
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME2(aac_kbd_short_128), 6.0, 128);
+
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_long_960), 4.0, 960);
+AAC_RENAME(ff_kbd_window_init)(AAC_RENAME(aac_kbd_short_120), 6.0, 120);
+
+AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_960), 960);
+AAC_RENAME(ff_sine_window_init)(AAC_RENAME(sine_120), 120);
+AAC_RENAME(ff_init_ff_sine_windows)(9);
+}
+
+static void init_tables(void)
+{
+static AVOnce init_float_once = AV_ONCE_INIT;
+ff_thread_once(_float_once, init_tables_float_fn);
+}
 
 #include "aacdec_dsp_template.c"
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 109c38d8e2..30da1fc198 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -204,6 +204,8 @@ typedef struct DynamicRangeControl {
  * DSP-specific primitives
  */
 typedef struct AACDecDSP {
+void (*init_tables)(void);
+
 void (*dequant_scalefactors)(SingleChannelElement *sce);
 
 void (*apply_mid_side_stereo)(AACDecContext *ac, ChannelElement *cpe);


[FFmpeg-cvslog] aacdec: template TNS application separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 22:20:59 2024 
+0100| [db5128ef70002c7d3e04398bfc7a9897659437f1] | committer: Lynne

aacdec: template TNS application separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=db5128ef70002c7d3e04398bfc7a9897659437f1
---

 libavcodec/aac/aacdec_dsp_template.c | 65 
 libavcodec/aacdec.h  |  6 ---
 libavcodec/aacdec_template.c | 73 +++-
 libavcodec/mips/aacdec_mips.c|  2 +-
 4 files changed, 71 insertions(+), 75 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 102091d331..da7f5fac4f 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -31,6 +31,7 @@
 
 #include "libavcodec/aacdec.h"
 #include "libavcodec/aac_defines.h"
+#include "libavcodec/lpc_functions.h"
 
 #include "libavcodec/aactab.h"
 
@@ -174,8 +175,72 @@ static void 
AAC_RENAME(apply_intensity_stereo)(AACDecContext *ac,
 }
 }
 
+/**
+ * Decode Temporal Noise Shaping filter coefficients and apply all-pole 
filters; reference: 4.6.9.3.
+ *
+ * @param   decode  1 if tool is used normally, 0 if tool is used in LTP.
+ * @param   coefspectral coefficients
+ */
+static void AAC_RENAME(apply_tns)(void *_coef_param, TemporalNoiseShaping *tns,
+  IndividualChannelStream *ics, int decode)
+{
+const int mmm = FFMIN(ics->tns_max_bands, ics->max_sfb);
+int w, filt, m, i;
+int bottom, top, order, start, end, size, inc;
+INTFLOAT *coef_param = _coef_param;
+INTFLOAT lpc[TNS_MAX_ORDER];
+INTFLOAT tmp[TNS_MAX_ORDER+1];
+UINTFLOAT *coef = coef_param;
+
+if(!mmm)
+return;
+
+for (w = 0; w < ics->num_windows; w++) {
+bottom = ics->num_swb;
+for (filt = 0; filt < tns->n_filt[w]; filt++) {
+top= bottom;
+bottom = FFMAX(0, top - tns->length[w][filt]);
+order  = tns->order[w][filt];
+if (order == 0)
+continue;
+
+// tns_decode_coef
+compute_lpc_coefs(tns->AAC_RENAME(coef)[w][filt], order, lpc, 0, 
0, 0);
+
+start = ics->swb_offset[FFMIN(bottom, mmm)];
+end   = ics->swb_offset[FFMIN(   top, mmm)];
+if ((size = end - start) <= 0)
+continue;
+if (tns->direction[w][filt]) {
+inc = -1;
+start = end - 1;
+} else {
+inc = 1;
+}
+start += w * 128;
+
+if (decode) {
+// ar filter
+for (m = 0; m < size; m++, start += inc)
+for (i = 1; i <= FFMIN(m, order); i++)
+coef[start] -= AAC_MUL26((INTFLOAT)coef[start - i * 
inc], lpc[i - 1]);
+} else {
+// ma filter
+for (m = 0; m < size; m++, start += inc) {
+tmp[0] = coef[start];
+for (i = 1; i <= FFMIN(m, order); i++)
+coef[start] += AAC_MUL26(tmp[i], lpc[i - 1]);
+for (i = order; i > 0; i--)
+tmp[i] = tmp[i - 1];
+}
+}
+}
+}
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
 .apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
 .apply_intensity_stereo = _RENAME(apply_intensity_stereo),
+.apply_tns = _RENAME(apply_tns),
 };
diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 9078c2a4e6..3c8d14a53e 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -311,12 +311,6 @@ struct AACDecContext {
 /* aacdec functions pointers */
 void (*imdct_and_windowing)(struct AACDecContext *ac, SingleChannelElement 
*sce);
 void (*apply_ltp)(struct AACDecContext *ac, SingleChannelElement *sce);
-union {
-void (*apply_tns)(float coef[1024], TemporalNoiseShaping *tns,
-  IndividualChannelStream *ics, int decode);
-void (*apply_tns_fixed)(int coef[1024], TemporalNoiseShaping *tns,
-IndividualChannelStream *ics, int decode);
-};
 union {
 void (*windowing_and_mdct_ltp)(struct AACDecContext *ac, float *out,
float *in, IndividualChannelStream 
*ics);
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index da0bc47ac9..d0a5a6660f 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -2367,68 +2367,6 @@ static int decode_extension_payload(AACDecContext *ac, 
GetBitContext *gb, int cn
 return res;
 }
 
-/**
- * Decode Temporal Noise Shaping filter coefficients and apply all-pole 
filters; reference: 4.6.9.3.
- *
- * 

[FFmpeg-cvslog] aacdec: template mid/side stereo application separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 21:59:35 2024 
+0100| [9f3fa77e0dac729ca3a0bf669c502cfa811c9ae7] | committer: Lynne

aacdec: template mid/side stereo application separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9f3fa77e0dac729ca3a0bf669c502cfa811c9ae7
---

 libavcodec/aac/aacdec_dsp_template.c | 35 +++
 libavcodec/aacdec_template.c | 36 +---
 2 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 9a43cb71e0..3a43e1b04e 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -88,6 +88,41 @@ static void 
AAC_RENAME(dequant_scalefactors)(SingleChannelElement *sce)
 }
 }
 
+/**
+ * Mid/Side stereo decoding; reference: 4.6.8.1.3.
+ */
+static void AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, 
ChannelElement *cpe)
+{
+const IndividualChannelStream *ics = >ch[0].ics;
+INTFLOAT *ch0 = cpe->ch[0].AAC_RENAME(coeffs);
+INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
+int g, i, group, idx = 0;
+const uint16_t *offsets = ics->swb_offset;
+for (g = 0; g < ics->num_window_groups; g++) {
+for (i = 0; i < ics->max_sfb; i++, idx++) {
+if (cpe->ms_mask[idx] &&
+cpe->ch[0].band_type[idx] < NOISE_BT &&
+cpe->ch[1].band_type[idx] < NOISE_BT) {
+#if USE_FIXED
+for (group = 0; group < ics->group_len[g]; group++) {
+ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
+ch1 + group * 128 + offsets[i],
+offsets[i+1] - offsets[i]);
+#else
+for (group = 0; group < ics->group_len[g]; group++) {
+ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
+   ch1 + group * 128 + offsets[i],
+   offsets[i+1] - offsets[i]);
+#endif /* USE_FIXED */
+}
+}
+}
+ch0 += ics->group_len[g] * 128;
+ch1 += ics->group_len[g] * 128;
+}
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
+.apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
 };
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index c2962a7ca2..4f7bc84395 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -2062,40 +2062,6 @@ fail:
 return ret;
 }
 
-/**
- * Mid/Side stereo decoding; reference: 4.6.8.1.3.
- */
-static void apply_mid_side_stereo(AACDecContext *ac, ChannelElement *cpe)
-{
-const IndividualChannelStream *ics = >ch[0].ics;
-INTFLOAT *ch0 = cpe->ch[0].AAC_RENAME(coeffs);
-INTFLOAT *ch1 = cpe->ch[1].AAC_RENAME(coeffs);
-int g, i, group, idx = 0;
-const uint16_t *offsets = ics->swb_offset;
-for (g = 0; g < ics->num_window_groups; g++) {
-for (i = 0; i < ics->max_sfb; i++, idx++) {
-if (cpe->ms_mask[idx] &&
-cpe->ch[0].band_type[idx] < NOISE_BT &&
-cpe->ch[1].band_type[idx] < NOISE_BT) {
-#if USE_FIXED
-for (group = 0; group < ics->group_len[g]; group++) {
-ac->fdsp->butterflies_fixed(ch0 + group * 128 + offsets[i],
-ch1 + group * 128 + offsets[i],
-offsets[i+1] - offsets[i]);
-#else
-for (group = 0; group < ics->group_len[g]; group++) {
-ac->fdsp->butterflies_float(ch0 + group * 128 + offsets[i],
-   ch1 + group * 128 + offsets[i],
-   offsets[i+1] - offsets[i]);
-#endif /* USE_FIXED */
-}
-}
-}
-ch0 += ics->group_len[g] * 128;
-ch1 += ics->group_len[g] * 128;
-}
-}
-
 /**
  * intensity stereo decoding; reference: 4.6.8.2.3
  *
@@ -2183,7 +2149,7 @@ static int decode_cpe(AACDecContext *ac, GetBitContext 
*gb, ChannelElement *cpe)
 
 if (common_window) {
 if (ms_present)
-apply_mid_side_stereo(ac, cpe);
+ac->dsp.apply_mid_side_stereo(ac, cpe);
 if (ac->oc[1].m4ac.object_type == AOT_AAC_MAIN) {
 apply_prediction(ac, >ch[0]);
 apply_prediction(ac, >ch[1]);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: template intensity stereo application separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 22:01:44 2024 
+0100| [ad16349f9bb802ddf0a09aa61b675a1dc5c70600] | committer: Lynne

aacdec: template intensity stereo application separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ad16349f9bb802ddf0a09aa61b675a1dc5c70600
---

 libavcodec/aac/aacdec_dsp_template.c | 53 +++
 libavcodec/aacdec_template.c | 54 +---
 2 files changed, 54 insertions(+), 53 deletions(-)

diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
index 3a43e1b04e..102091d331 100644
--- a/libavcodec/aac/aacdec_dsp_template.c
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -122,7 +122,60 @@ static void 
AAC_RENAME(apply_mid_side_stereo)(AACDecContext *ac, ChannelElement
 }
 }
 
+/**
+ * intensity stereo decoding; reference: 4.6.8.2.3
+ *
+ * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
+ *  [1] mask is decoded from bitstream; [2] mask is all 1s;
+ *  [3] reserved for scalable AAC
+ */
+static void AAC_RENAME(apply_intensity_stereo)(AACDecContext *ac,
+   ChannelElement *cpe, int 
ms_present)
+{
+const IndividualChannelStream *ics = >ch[1].ics;
+SingleChannelElement *sce1 = >ch[1];
+INTFLOAT *coef0 = cpe->ch[0].AAC_RENAME(coeffs), *coef1 = 
cpe->ch[1].AAC_RENAME(coeffs);
+const uint16_t *offsets = ics->swb_offset;
+int g, group, i, idx = 0;
+int c;
+INTFLOAT scale;
+for (g = 0; g < ics->num_window_groups; g++) {
+for (i = 0; i < ics->max_sfb;) {
+if (sce1->band_type[idx] == INTENSITY_BT ||
+sce1->band_type[idx] == INTENSITY_BT2) {
+const int bt_run_end = sce1->band_type_run_end[idx];
+for (; i < bt_run_end; i++, idx++) {
+c = -1 + 2 * (sce1->band_type[idx] - 14);
+if (ms_present)
+c *= 1 - 2 * cpe->ms_mask[idx];
+scale = c * sce1->AAC_RENAME(sf)[idx];
+for (group = 0; group < ics->group_len[g]; group++)
+#if USE_FIXED
+ac->subband_scale(coef1 + group * 128 + offsets[i],
+  coef0 + group * 128 + offsets[i],
+  scale,
+  23,
+  offsets[i + 1] - offsets[i] ,ac->avctx);
+#else
+ac->fdsp->vector_fmul_scalar(coef1 + group * 128 + 
offsets[i],
+coef0 + group * 128 + 
offsets[i],
+scale,
+offsets[i + 1] - 
offsets[i]);
+#endif /* USE_FIXED */
+}
+} else {
+int bt_run_end = sce1->band_type_run_end[idx];
+idx += bt_run_end - i;
+i= bt_run_end;
+}
+}
+coef0 += ics->group_len[g] * 128;
+coef1 += ics->group_len[g] * 128;
+}
+}
+
 const AACDecDSP AAC_RENAME(aac_dsp) = {
 .dequant_scalefactors = _RENAME(dequant_scalefactors),
 .apply_mid_side_stereo = _RENAME(apply_mid_side_stereo),
+.apply_intensity_stereo = _RENAME(apply_intensity_stereo),
 };
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 4f7bc84395..da0bc47ac9 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -2062,58 +2062,6 @@ fail:
 return ret;
 }
 
-/**
- * intensity stereo decoding; reference: 4.6.8.2.3
- *
- * @param   ms_present  Indicates mid/side stereo presence. [0] mask is all 0s;
- *  [1] mask is decoded from bitstream; [2] mask is all 1s;
- *  [3] reserved for scalable AAC
- */
-static void apply_intensity_stereo(AACDecContext *ac,
-   ChannelElement *cpe, int ms_present)
-{
-const IndividualChannelStream *ics = >ch[1].ics;
-SingleChannelElement *sce1 = >ch[1];
-INTFLOAT *coef0 = cpe->ch[0].AAC_RENAME(coeffs), *coef1 = 
cpe->ch[1].AAC_RENAME(coeffs);
-const uint16_t *offsets = ics->swb_offset;
-int g, group, i, idx = 0;
-int c;
-INTFLOAT scale;
-for (g = 0; g < ics->num_window_groups; g++) {
-for (i = 0; i < ics->max_sfb;) {
-if (sce1->band_type[idx] == INTENSITY_BT ||
-sce1->band_type[idx] == INTENSITY_BT2) {
-const int bt_run_end = sce1->band_type_run_end[idx];
-for (; i < bt_run_end; i++, idx++) {
-c = -1 + 2 * (sce1->band_type[idx] - 14);
-if (ms_present)
-c *= 1 - 2 * cpe->ms_mask[idx]

[FFmpeg-cvslog] aacdec: template scalefactor dequantization separately

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 21:53:49 2024 
+0100| [a6295586f5040ce5ce838ff2ae11a5ba9b41d855] | committer: Lynne

aacdec: template scalefactor dequantization separately

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a6295586f5040ce5ce838ff2ae11a5ba9b41d855
---

 libavcodec/aac/Makefile  |  6 ++-
 libavcodec/aac/aacdec.c  |  5 ++
 libavcodec/aac/aacdec_dsp_template.c | 93 
 libavcodec/aac/aacdec_fixed.c| 34 +
 libavcodec/aac/aacdec_float.c| 34 +
 libavcodec/aacdec_template.c | 56 +-
 6 files changed, 171 insertions(+), 57 deletions(-)

diff --git a/libavcodec/aac/Makefile b/libavcodec/aac/Makefile
index 29e0ef0a2c..c3e525d373 100644
--- a/libavcodec/aac/Makefile
+++ b/libavcodec/aac/Makefile
@@ -1,5 +1,7 @@
 clean::
$(RM) $(CLEANSUFFIXES:%=libavcodec/aac/%)
 
-OBJS-$(CONFIG_AAC_DECODER)  +=  aac/aacdec.o aac/aacdec_tab.o
-OBJS-$(CONFIG_AAC_FIXED_DECODER)+=  aac/aacdec.o aac/aacdec_tab.o
+OBJS-$(CONFIG_AAC_DECODER)  +=  aac/aacdec.o aac/aacdec_tab.o \
+aac/aacdec_float.o
+OBJS-$(CONFIG_AAC_FIXED_DECODER)+=  aac/aacdec.o aac/aacdec_tab.o \
+aac/aacdec_fixed.o
diff --git a/libavcodec/aac/aacdec.c b/libavcodec/aac/aacdec.c
index 358fe598e5..00353bddc7 100644
--- a/libavcodec/aac/aacdec.c
+++ b/libavcodec/aac/aacdec.c
@@ -45,6 +45,9 @@
 #include "libavutil/tx.h"
 #include "libavutil/version.h"
 
+extern const AACDecDSP aac_dsp;
+extern const AACDecDSP aac_dsp_fixed;
+
 av_cold int ff_aac_decode_close(AVCodecContext *avctx)
 {
 AACDecContext *ac = avctx->priv_data;
@@ -115,6 +118,8 @@ av_cold int ff_aac_decode_init_common(AVCodecContext *avctx)
 if (ret < 0)
 return ret;
 
+ac->dsp = is_fixed ? aac_dsp_fixed : aac_dsp;
+
 return 0;
 }
 
diff --git a/libavcodec/aac/aacdec_dsp_template.c 
b/libavcodec/aac/aacdec_dsp_template.c
new file mode 100644
index 00..9a43cb71e0
--- /dev/null
+++ b/libavcodec/aac/aacdec_dsp_template.c
@@ -0,0 +1,93 @@
+/*
+ * AAC decoder
+ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
+ * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
+ * Copyright (c) 2008-2013 Alex Converse 
+ *
+ * AAC LATM decoder
+ * Copyright (c) 2008-2010 Paul Kendall 
+ * Copyright (c) 2010  Janne Grunau 
+ *
+ * AAC decoder fixed-point implementation
+ * Copyright (c) 2013
+ *  MIPS Technologies, Inc., California.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/aacdec.h"
+#include "libavcodec/aac_defines.h"
+
+#include "libavcodec/aactab.h"
+
+/**
+ * Convert integer scalefactors to the decoder's native expected
+ * scalefactor values.
+ */
+static void AAC_RENAME(dequant_scalefactors)(SingleChannelElement *sce)
+{
+IndividualChannelStream *ics = >ics;
+const enum BandType *band_type = sce->band_type;
+const int *band_type_run_end = sce->band_type_run_end;
+const int *sfo = sce->sfo;
+INTFLOAT *sf = sce->AAC_RENAME(sf);
+
+int g, i, idx = 0;
+for (g = 0; g < ics->num_window_groups; g++) {
+for (i = 0; i < ics->max_sfb;) {
+int run_end = band_type_run_end[idx];
+switch (band_type[idx]) {
+case ZERO_BT:
+for (; i < run_end; i++, idx++)
+sf[idx] = FIXR(0.);
+break;
+case INTENSITY_BT: /* fallthrough */
+case INTENSITY_BT2:
+for (; i < run_end; i++, idx++) {
+#if USE_FIXED
+sf[idx] = 100 - sfo[idx];
+#else
+sf[idx] = ff_aac_pow2sf_tab[-sfo[idx] + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+}
+break;
+case NOISE_BT:
+for (; i < run_end; i++, idx++) {
+#if USE_FIXED
+sf[idx] = -(100 + sfo[idx]);
+#else
+sf[idx] = -ff_aac_pow2sf_tab[sfo[idx] + POW_SF2_ZERO];
+#endif /* USE_FIXED */
+}
+break;
+

[FFmpeg-cvslog] aacdec: separate out scalefactor dequantization from decoding

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 07:11:22 2024 
+0100| [60b60dd635a4e69e43ead19f10f90ac03aa83ee1] | committer: Lynne

aacdec: separate out scalefactor dequantization from decoding

Allows to template away dequantization.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=60b60dd635a4e69e43ead19f10f90ac03aa83ee1
---

 libavcodec/aacdec.h  |  1 +
 libavcodec/aacdec_template.c | 79 ++--
 2 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index 052ed59e01..9078c2a4e6 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -147,6 +147,7 @@ typedef struct SingleChannelElement {
 TemporalNoiseShaping tns;
 enum BandType band_type[128];   ///< band types
 int band_type_run_end[120]; ///< band type run end 
points
+int sfo[120];   ///< scalefactor offsets
 INTFLOAT_UNION(sf, [120]);  ///< scalefactors
 INTFLOAT_ALIGNED_UNION(32, coeffs,1024);///< coefficients for 
IMDCT, maybe processed
 INTFLOAT_ALIGNED_UNION(32, saved, 1536);///< overlap
diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 3adb4ebdf1..5f3be074f7 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1446,7 +1446,8 @@ static int decode_band_types(AACDecContext *ac, enum 
BandType band_type[120],
  *
  * @return  Returns error status. 0 - OK, !0 - error
  */
-static int decode_scalefactors(AACDecContext *ac, INTFLOAT sf[120], 
GetBitContext *gb,
+static int decode_scalefactors(AACDecContext *ac, int sfo[120],
+   GetBitContext *gb,
unsigned int global_gain,
IndividualChannelStream *ics,
enum BandType band_type[120],
@@ -1461,7 +1462,7 @@ static int decode_scalefactors(AACDecContext *ac, 
INTFLOAT sf[120], GetBitContex
 int run_end = band_type_run_end[idx];
 if (band_type[idx] == ZERO_BT) {
 for (; i < run_end; i++, idx++)
-sf[idx] = FIXR(0.);
+sfo[idx] = 0;
 } else if ((band_type[idx] == INTENSITY_BT) ||
(band_type[idx] == INTENSITY_BT2)) {
 for (; i < run_end; i++, idx++) {
@@ -1473,11 +1474,7 @@ static int decode_scalefactors(AACDecContext *ac, 
INTFLOAT sf[120], GetBitContex
   "Clipped intensity stereo 
position (%d -> %d)",
   offset[2], clipped_offset);
 }
-#if USE_FIXED
-sf[idx] = 100 - clipped_offset;
-#else
-sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + 
POW_SF2_ZERO];
-#endif /* USE_FIXED */
+sfo[idx] = clipped_offset;
 }
 } else if (band_type[idx] == NOISE_BT) {
 for (; i < run_end; i++, idx++) {
@@ -1492,11 +1489,7 @@ static int decode_scalefactors(AACDecContext *ac, 
INTFLOAT sf[120], GetBitContex
   "Clipped noise gain (%d -> %d)",
   offset[1], clipped_offset);
 }
-#if USE_FIXED
-sf[idx] = -(100 + clipped_offset);
-#else
-sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + 
POW_SF2_ZERO];
-#endif /* USE_FIXED */
+sfo[idx] = clipped_offset;
 }
 } else {
 for (; i < run_end; i++, idx++) {
@@ -1506,16 +1499,66 @@ static int decode_scalefactors(AACDecContext *ac, 
INTFLOAT sf[120], GetBitContex
"Scalefactor (%d) out of range.\n", offset[0]);
 return AVERROR_INVALIDDATA;
 }
+sfo[idx] = offset[0];
+}
+}
+}
+}
+return 0;
+}
+
+/**
+ * Convert integer scalefactors to the decoder's native expected
+ * scalefactor values.
+ */
+static void dequant_scalefactors(SingleChannelElement *sce)
+{
+IndividualChannelStream *ics = >ics;
+const enum BandType *band_type = sce->band_type;
+const int *band_type_run_end = sce->band_type_run_end;
+const int *sfo = sce->sfo;
+INTFLOAT *sf = sce->AAC_RENAME(sf);
+
+int g, i, idx = 0;
+for (g = 0; g < ics->num_window_groups; g++) {
+for (i = 0; i < ics->max_sfb;) {
+int run_end = band_type_run_end[idx];
+switch (band_type[idx]) {
+case ZERO_BT:
+for (; i < run_end; i++, idx++)
+sf[idx] = FIXR(0.);
+break;
+case INTENSITY_BT: /* fallthrough */
+case INTENSITY

[FFmpeg-cvslog] aacdec: switch-ify scalefactor decoding

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 07:16:57 2024 
+0100| [7f3b3e2df1d20d33aebd8c4f37eb4ac0e76cd58f] | committer: Lynne

aacdec: switch-ify scalefactor decoding

Brings it in line with dequantization.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7f3b3e2df1d20d33aebd8c4f37eb4ac0e76cd58f
---

 libavcodec/aacdec_template.c | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index a14b03263d..70a9c0c014 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1460,11 +1460,13 @@ static int decode_scalefactors(AACDecContext *ac, int 
sfo[120],
 for (g = 0; g < ics->num_window_groups; g++) {
 for (i = 0; i < ics->max_sfb;) {
 int run_end = band_type_run_end[idx];
-if (band_type[idx] == ZERO_BT) {
+switch (band_type[idx]) {
+case ZERO_BT:
 for (; i < run_end; i++, idx++)
 sfo[idx] = 0;
-} else if ((band_type[idx] == INTENSITY_BT) ||
-   (band_type[idx] == INTENSITY_BT2)) {
+break;
+case INTENSITY_BT: /* fallthrough */
+case INTENSITY_BT2:
 for (; i < run_end; i++, idx++) {
 offset[2] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 
SCALE_DIFF_ZERO;
 clipped_offset = av_clip(offset[2], -155, 100);
@@ -1476,7 +1478,8 @@ static int decode_scalefactors(AACDecContext *ac, int 
sfo[120],
 }
 sfo[idx] = clipped_offset;
 }
-} else if (band_type[idx] == NOISE_BT) {
+break;
+case NOISE_BT:
 for (; i < run_end; i++, idx++) {
 if (noise_flag-- > 0)
 offset[1] += get_bits(gb, NOISE_PRE_BITS) - NOISE_PRE;
@@ -1491,7 +1494,8 @@ static int decode_scalefactors(AACDecContext *ac, int 
sfo[120],
 }
 sfo[idx] = clipped_offset;
 }
-} else {
+break;
+default:
 for (; i < run_end; i++, idx++) {
 offset[0] += get_vlc2(gb, ff_vlc_scalefactors, 7, 3) - 
SCALE_DIFF_ZERO;
 if (offset[0] > 255U) {
@@ -1501,6 +1505,7 @@ static int decode_scalefactors(AACDecContext *ac, int 
sfo[120],
 }
 sfo[idx] = offset[0];
 }
+break;
 }
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: give spectrum dequant+decode SCE rather than an ICS

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 07:11:55 2024 
+0100| [77a88bbddaa9b1afa3c61ea4f3271b31b0aa87a2] | committer: Lynne

aacdec: give spectrum dequant+decode SCE rather than an ICS

Eliminates using templated values in function definition.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=77a88bbddaa9b1afa3c61ea4f3271b31b0aa87a2
---

 libavcodec/aacdec_template.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libavcodec/aacdec_template.c b/libavcodec/aacdec_template.c
index 5f3be074f7..a14b03263d 100644
--- a/libavcodec/aacdec_template.c
+++ b/libavcodec/aacdec_template.c
@@ -1660,14 +1660,16 @@ static void decode_mid_side_stereo(ChannelElement *cpe, 
GetBitContext *gb,
  * @return  Returns error status. 0 - OK, !0 - error
  */
 static int decode_spectrum_and_dequant(AACDecContext *ac, INTFLOAT coef[1024],
-   GetBitContext *gb, const INTFLOAT 
sf[120],
+   GetBitContext *gb,
int pulse_present, const Pulse *pulse,
-   const IndividualChannelStream *ics,
-   enum BandType band_type[120])
+   SingleChannelElement *sce)
 {
 int i, k, g, idx = 0;
+IndividualChannelStream *ics = >ics;
 const int c = 1024 / ics->num_windows;
 const uint16_t *offsets = ics->swb_offset;
+const INTFLOAT *sf = sce->AAC_RENAME(sf);
+const enum BandType *band_type = sce->band_type;
 INTFLOAT *coef_base = coef;
 
 for (g = 0; g < ics->num_windows; g++)
@@ -2094,8 +2096,9 @@ static int decode_ics(AACDecContext *ac, 
SingleChannelElement *sce,
 }
 }
 
-ret = decode_spectrum_and_dequant(ac, out, gb, sce->AAC_RENAME(sf), 
pulse_present,
-, ics, sce->band_type);
+ret = decode_spectrum_and_dequant(ac, out, gb,
+  pulse_present,
+  , sce);
 if (ret < 0)
 goto fail;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: add a decoder DSP structure

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Mar 13 06:35:38 2024 
+0100| [ed009bfd3d2d8c37395373201afc58bb32cf1366] | committer: Lynne

aacdec: add a decoder DSP structure

To be used to abstract away DSP functions.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ed009bfd3d2d8c37395373201afc58bb32cf1366
---

 libavcodec/aacdec.h | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/libavcodec/aacdec.h b/libavcodec/aacdec.h
index e55fea6e40..052ed59e01 100644
--- a/libavcodec/aacdec.h
+++ b/libavcodec/aacdec.h
@@ -41,6 +41,8 @@
 #include "aac.h"
 #include "mpeg4audio.h"
 
+typedef struct AACDecContext AACDecContext;
+
 /**
  * Output configuration status
  */
@@ -197,12 +199,37 @@ typedef struct DynamicRangeControl {
  */
 } DynamicRangeControl;
 
+/**
+ * DSP-specific primitives
+ */
+typedef struct AACDecDSP {
+void (*dequant_scalefactors)(SingleChannelElement *sce);
+
+void (*apply_mid_side_stereo)(AACDecContext *ac, ChannelElement *cpe);
+void (*apply_intensity_stereo)(AACDecContext *ac, ChannelElement *cpe,
+   int ms_present);
+
+void (*apply_tns)(void *_coef_param, TemporalNoiseShaping *tns,
+  IndividualChannelStream *ics, int decode);
+
+void (*apply_ltp)(AACDecContext *ac, SingleChannelElement *sce);
+void (*update_ltp)(AACDecContext *ac, SingleChannelElement *sce);
+
+void (*imdct_and_windowing)(AACDecContext *ac, SingleChannelElement *sce);
+void (*imdct_and_windowing_960)(AACDecContext *ac, SingleChannelElement 
*sce);
+void (*imdct_and_windowing_ld)(AACDecContext *ac, SingleChannelElement 
*sce);
+void (*imdct_and_windowing_eld)(AACDecContext *ac, SingleChannelElement 
*sce);
+} AACDecDSP;
+
 /**
  * main AAC decoding context
  */
-typedef struct AACDecContext {
+struct AACDecContext {
 const struct AVClass  *class;
 struct AVCodecContext *avctx;
+
+AACDecDSP dsp;
+
 struct AVFrame *frame;
 
 int is_saved; ///< Set if elements have stored overlap 
from previous frame.
@@ -298,7 +325,7 @@ typedef struct AACDecContext {
 void (*update_ltp)(struct AACDecContext *ac, SingleChannelElement *sce);
 void (*vector_pow43)(int *coefs, int len);
 void (*subband_scale)(int *dst, int *src, int scale, int offset, int len, 
void *log_context);
-} AACDecContext;
+};
 
 #if defined(USE_FIXED) && USE_FIXED
 #define fdsp  RENAME_FIXED(fdsp)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] aacdec: move aacdec_common to aac/aacdec_tab

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Feb 29 04:52:58 2024 
+0100| [f55b587820847c4ce442a2dc2eda2b68bcbefd11] | committer: Lynne

aacdec: move aacdec_common to aac/aacdec_tab

Start to clean up the decoder.
Also renames a confusingly named file.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f55b587820847c4ce442a2dc2eda2b68bcbefd11
---

 libavcodec/Makefile  |  5 +++--
 libavcodec/aac/Makefile  |  5 +
 libavcodec/{aacdec_common.c => aac/aacdec_tab.c} | 11 ++-
 libavcodec/{aacdectab.h => aac/aacdec_tab.h} |  8 
 libavcodec/aacdec.c  |  2 +-
 libavcodec/aacdec_fixed.c|  2 +-
 libavcodec/aacsbr_template.c |  2 +-
 7 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index e1df848f53..f2da83c8eb 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -64,6 +64,7 @@ OBJS = ac3_parser.o   
  \
xiph.o   \
 
 # subsystems
+include $(SRC_PATH)/libavcodec/aac/Makefile
 include $(SRC_PATH)/libavcodec/vvc/Makefile
 -include $(SRC_PATH)/libavcodec/$(ARCH)/vvc/Makefile
 OBJS-$(CONFIG_AANDCTTABLES)+= aandcttab.o
@@ -179,11 +180,11 @@ OBJS-$(CONFIG_WMV2DSP) += wmv2dsp.o
 OBJS-$(CONFIG_ZERO12V_DECODER) += 012v.o
 OBJS-$(CONFIG_A64MULTI_ENCODER)+= a64multienc.o elbg.o
 OBJS-$(CONFIG_A64MULTI5_ENCODER)   += a64multienc.o elbg.o
-OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aacdec_common.o aactab.o \
+OBJS-$(CONFIG_AAC_DECODER) += aacdec.o aactab.o \
   aacsbr.o aacps_common.o 
aacps_float.o \
   kbdwin.o \
   sbrdsp.o aacpsdsp_float.o cbrt_data.o
-OBJS-$(CONFIG_AAC_FIXED_DECODER)   += aacdec_fixed.o aacdec_common.o 
aactab.o \
+OBJS-$(CONFIG_AAC_FIXED_DECODER)   += aacdec_fixed.o aactab.o \
   aacsbr_fixed.o aacps_common.o 
aacps_fixed.o \
   kbdwin.o \
   sbrdsp_fixed.o aacpsdsp_fixed.o 
cbrt_data_fixed.o
diff --git a/libavcodec/aac/Makefile b/libavcodec/aac/Makefile
new file mode 100644
index 00..52facdf4cf
--- /dev/null
+++ b/libavcodec/aac/Makefile
@@ -0,0 +1,5 @@
+clean::
+   $(RM) $(CLEANSUFFIXES:%=libavcodec/aac/%)
+
+OBJS-$(CONFIG_AAC_DECODER)  +=  aac/aacdec_tab.o
+OBJS-$(CONFIG_AAC_FIXED_DECODER)+=  aac/aacdec_tab.o
diff --git a/libavcodec/aacdec_common.c b/libavcodec/aac/aacdec_tab.c
similarity index 99%
rename from libavcodec/aacdec_common.c
rename to libavcodec/aac/aacdec_tab.c
index 145c718047..45a84a9a72 100644
--- a/libavcodec/aacdec_common.c
+++ b/libavcodec/aac/aacdec_tab.c
@@ -25,11 +25,12 @@
  * Common code and tables of the AAC fixed- and floating-point decoders
  */
 
-#include "aac.h"
-#include "aacdectab.h"
-#include "aacps.h"
-#include "aactab.h"
-#include "vlc.h"
+#include "aacdec_tab.h"
+
+#include "libavcodec/aac.h"
+#include "libavcodec/aacps.h"
+#include "libavcodec/aactab.h"
+#include "libavcodec/vlc.h"
 
 #include "libavutil/attributes.h"
 #include "libavutil/thread.h"
diff --git a/libavcodec/aacdectab.h b/libavcodec/aac/aacdec_tab.h
similarity index 91%
rename from libavcodec/aacdectab.h
rename to libavcodec/aac/aacdec_tab.h
index 184508f2f3..70e49af202 100644
--- a/libavcodec/aacdectab.h
+++ b/libavcodec/aac/aacdec_tab.h
@@ -25,12 +25,12 @@
  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  */
 
-#ifndef AVCODEC_AACDECTAB_H
-#define AVCODEC_AACDECTAB_H
+#ifndef AVCODEC_AAC_AACDEC_TAB_H
+#define AVCODEC_AAC_AACDEC_TAB_H
 
 #include 
 
-#include "vlc.h"
+#include "libavcodec/vlc.h"
 
 #include "libavutil/attributes_internal.h"
 #include "libavutil/channel_layout.h"
@@ -52,4 +52,4 @@ extern const int16_t ff_aac_channel_map[3][4][6];
 extern const AVChannelLayout ff_aac_ch_layout[];
 FF_VISIBILITY_POP_HIDDEN
 
-#endif /* AVCODEC_AACDECTAB_H */
+#endif /* AVCODEC_AAC_AACDEC_TAB_H */
diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c
index b4870a6b1f..9c1b0cdc1f 100644
--- a/libavcodec/aacdec.c
+++ b/libavcodec/aacdec.c
@@ -47,7 +47,7 @@
 #include "aac.h"
 #include "aacdec.h"
 #include "aactab.h"
-#include "aacdectab.h"
+#include "aac/aacdec_tab.h"
 #include "adts_header.h"
 #include "cbrt_data.h"
 #include "sbr.h"
diff --git a/libavcodec/aacdec_fixed.c b/libavcodec/aacdec_fixed.c
index 305bb0ba9a..681e502e42 100644
--- a/libavcodec/aacdec_fixed.c
+++ b/libavcodec/aacdec_f

[FFmpeg-cvslog] aacsbr_template: include mem.h

2024-04-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Apr 10 18:44:48 2024 
+0200| [3600f757db3f908df0fcd5b5e1999aca22c41a5b] | committer: Lynne

aacsbr_template: include mem.h

Future AAC changes remove the need to include this header.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3600f757db3f908df0fcd5b5e1999aca22c41a5b
---

 libavcodec/aacsbr_template.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/aacsbr_template.c b/libavcodec/aacsbr_template.c
index eadd6fa2d3..f1dfa0da36 100644
--- a/libavcodec/aacsbr_template.c
+++ b/libavcodec/aacsbr_template.c
@@ -36,6 +36,7 @@
 #include "aac/aacdec_tab.h"
 #include "avcodec.h"
 #include "libavutil/qsort.h"
+#include "libavutil/mem.h"
 
 static av_cold void aacsbr_tableinit(void)
 {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_av1: add workaround for NVIDIA drivers tested on broken CTS

2024-04-16 Thread Lynne
ffmpeg | branch: release/7.0 | Lynne  | Sun Apr 14 14:11:44 2024 
+0200| [8dfafe536657e5c5437cf24f7cb058ef7a9f1875] | committer: Lynne

vulkan_av1: add workaround for NVIDIA drivers tested on broken CTS

The first release of the CTS for AV1 decoding had incorrect
offsets for the OrderHints values.
The CTS will be fixed, and eventually, the drivers will be
updated to the proper spec-conforming behaviour, but we still
need to add a workaround as this will take months.

Only NVIDIA use these values at all, so limit the workaround
to only NVIDIA. Also, other vendors don't tend to provide accurate
CTS information.

(cherry picked from commit db09f1a5d811a3ca8adc89c58e29932efd0c255e)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8dfafe536657e5c5437cf24f7cb058ef7a9f1875
---

 libavcodec/vulkan_av1.c| 19 +++
 libavcodec/vulkan_decode.c |  9 +
 libavcodec/vulkan_decode.h |  4 
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index fcc9a4f03b..49cd69d051 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -97,9 +97,14 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const 
AV1Frame **ref_src,
 .RefFrameSignBias = hp->ref_frame_sign_bias_mask,
 };
 
-if (saved_order_hints)
-for (int i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++)
-vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+if (saved_order_hints) {
+if (dec->quirk_av1_offset)
+for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+vkav1_std_ref->SavedOrderHints[i - 1] = saved_order_hints[i];
+else
+for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+}
 
 *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) {
 .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR,
@@ -490,8 +495,14 @@ static int vk_av1_start_frame(AVCodecContext  
*avctx,
 }
 }
 
+if (dec->quirk_av1_offset)
+for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+ap->std_pic_info.OrderHints[i - 1] = pic->order_hints[i];
+else
+for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+ap->std_pic_info.OrderHints[i] = pic->order_hints[i];
+
 for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) {
-ap->std_pic_info.OrderHints[i] = pic->order_hints[i];
 ap->loop_filter.loop_filter_ref_deltas[i] = 
frame_header->loop_filter_ref_deltas[i];
 ap->global_motion.GmType[i] = s->cur_frame.gm_type[i];
 for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) {
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 4561f26b62..5f6523920d 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1114,6 +1114,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 FFVulkanFunctions *vk;
 const VkVideoProfileInfoKHR *profile;
 const FFVulkanDecodeDescriptor *vk_desc;
+const VkPhysicalDeviceDriverProperties *driver_props;
 
 VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
 .sType = 
VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
@@ -1275,6 +1276,14 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 return AVERROR_EXTERNAL;
 }
 
+driver_props = >shared_ctx->s.driver_props;
+if (driver_props->driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY &&
+driver_props->conformanceVersion.major == 1 &&
+driver_props->conformanceVersion.minor == 3 &&
+driver_props->conformanceVersion.subminor == 8 &&
+driver_props->conformanceVersion.patch < 3)
+dec->quirk_av1_offset = 1;
+
 ff_vk_decode_flush(avctx);
 
 av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 7ba8b239cb..076af93499 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -72,6 +72,10 @@ typedef struct FFVulkanDecodeContext {
 int external_fg;   /* Oddity  #2 - hardware can't apply film grain */
 uint32_t frame_id_alloc_mask; /* For AV1 only */
 
+/* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1.
+ * The tests were incorrect as the OrderHints were offset by 1. */
+int quirk_av1_offset;
+
 /* Thread-local state below */
 struct HEVCHeaderSet *hevc_headers;
 size_t hevc_headers_size;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_av1: add workaround for NVIDIA drivers tested on broken CTS

2024-04-14 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Apr 14 14:11:44 2024 
+0200| [db09f1a5d811a3ca8adc89c58e29932efd0c255e] | committer: Lynne

vulkan_av1: add workaround for NVIDIA drivers tested on broken CTS

The first release of the CTS for AV1 decoding had incorrect
offsets for the OrderHints values.
The CTS will be fixed, and eventually, the drivers will be
updated to the proper spec-conforming behaviour, but we still
need to add a workaround as this will take months.

Only NVIDIA use these values at all, so limit the workaround
to only NVIDIA. Also, other vendors don't tend to provide accurate
CTS information.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=db09f1a5d811a3ca8adc89c58e29932efd0c255e
---

 libavcodec/vulkan_av1.c| 19 +++
 libavcodec/vulkan_decode.c |  9 +
 libavcodec/vulkan_decode.h |  4 
 3 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index fcc9a4f03b..49cd69d051 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -97,9 +97,14 @@ static int vk_av1_fill_pict(AVCodecContext *avctx, const 
AV1Frame **ref_src,
 .RefFrameSignBias = hp->ref_frame_sign_bias_mask,
 };
 
-if (saved_order_hints)
-for (int i = 0; i < AV1_TOTAL_REFS_PER_FRAME; i++)
-vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+if (saved_order_hints) {
+if (dec->quirk_av1_offset)
+for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+vkav1_std_ref->SavedOrderHints[i - 1] = saved_order_hints[i];
+else
+for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+vkav1_std_ref->SavedOrderHints[i] = saved_order_hints[i];
+}
 
 *vkav1_ref = (VkVideoDecodeAV1DpbSlotInfoKHR) {
 .sType = VK_STRUCTURE_TYPE_VIDEO_DECODE_AV1_DPB_SLOT_INFO_KHR,
@@ -490,8 +495,14 @@ static int vk_av1_start_frame(AVCodecContext  
*avctx,
 }
 }
 
+if (dec->quirk_av1_offset)
+for (int i = 1; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+ap->std_pic_info.OrderHints[i - 1] = pic->order_hints[i];
+else
+for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++)
+ap->std_pic_info.OrderHints[i] = pic->order_hints[i];
+
 for (int i = 0; i < STD_VIDEO_AV1_TOTAL_REFS_PER_FRAME; i++) {
-ap->std_pic_info.OrderHints[i] = pic->order_hints[i];
 ap->loop_filter.loop_filter_ref_deltas[i] = 
frame_header->loop_filter_ref_deltas[i];
 ap->global_motion.GmType[i] = s->cur_frame.gm_type[i];
 for (int j = 0; j < STD_VIDEO_AV1_GLOBAL_MOTION_PARAMS; j++) {
diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 9c6c2d4efb..d8c75cd0e6 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1115,6 +1115,7 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 FFVulkanFunctions *vk;
 const VkVideoProfileInfoKHR *profile;
 const FFVulkanDecodeDescriptor *vk_desc;
+const VkPhysicalDeviceDriverProperties *driver_props;
 
 VkVideoDecodeH264SessionParametersCreateInfoKHR h264_params = {
 .sType = 
VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_SESSION_PARAMETERS_CREATE_INFO_KHR,
@@ -1276,6 +1277,14 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 return AVERROR_EXTERNAL;
 }
 
+driver_props = >shared_ctx->s.driver_props;
+if (driver_props->driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY &&
+driver_props->conformanceVersion.major == 1 &&
+driver_props->conformanceVersion.minor == 3 &&
+driver_props->conformanceVersion.subminor == 8 &&
+driver_props->conformanceVersion.patch < 3)
+dec->quirk_av1_offset = 1;
+
 ff_vk_decode_flush(avctx);
 
 av_log(avctx, AV_LOG_VERBOSE, "Vulkan decoder initialization sucessful\n");
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 7ba8b239cb..076af93499 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -72,6 +72,10 @@ typedef struct FFVulkanDecodeContext {
 int external_fg;   /* Oddity  #2 - hardware can't apply film grain */
 uint32_t frame_id_alloc_mask; /* For AV1 only */
 
+/* Workaround for NVIDIA drivers tested with CTS version 1.3.8 for AV1.
+ * The tests were incorrect as the OrderHints were offset by 1. */
+int quirk_av1_offset;
+
 /* Thread-local state below */
 struct HEVCHeaderSet *hevc_headers;
 size_t hevc_headers_size;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/avfft: fix RDFT wrapper stride

2024-04-10 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Apr  6 07:30:07 2024 
+0200| [89a9042291e2f54be98e54e8e8fa50ee3fe7d1a6] | committer: Lynne

lavc/avfft: fix RDFT wrapper stride

Per the lavu/tx docs:

> * For forward transforms (R2C), stride must be the spacing between two
> * samples in bytes. For inverse transforms, the stride must be set
> * to the spacing between two complex values in bytes.

The code did the reverse.
The stride parameter is currently not respected for RDFT transforms,
but has to be correct, for a potential future change.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=89a9042291e2f54be98e54e8e8fa50ee3fe7d1a6
---

 libavcodec/avfft.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 627fd7a0be..f6787937f6 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -158,7 +158,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType 
trans)
 return NULL;
 }
 
-s->stride = (trans == DFT_C2R) ? sizeof(float) : sizeof(AVComplexFloat);
+s->stride = (trans == DFT_C2R) ? sizeof(AVComplexFloat) : sizeof(float);
 s->len = 1 << nbits;
 s->inv = trans == IDFT_C2R;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_av1: port to the new stable API

2024-03-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Jan 19 10:49:02 2024 
+1000| [ecdc94b97f809d5f2b88640842fd0541951ad295] | committer: Lynne

vulkan_av1: port to the new stable API

Co-Authored-by: Dave Airlie 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ecdc94b97f809d5f2b88640842fd0541951ad295
---

 configure  |   4 +-
 libavcodec/Makefile|   5 +-
 libavcodec/vulkan_av1.c| 514 -
 libavcodec/vulkan_decode.c |  31 +-
 libavcodec/vulkan_decode.h |   2 +-
 libavcodec/vulkan_video.h  |   2 -
 libavcodec/vulkan_video_codec_av1std_decode_mesa.h |  36 --
 libavcodec/vulkan_video_codec_av1std_mesa.h| 403 
 libavutil/hwcontext_vulkan.c   |   2 +-
 libavutil/vulkan_functions.h   |   2 +-
 libavutil/vulkan_loader.h  |   2 +-
 11 files changed, 306 insertions(+), 697 deletions(-)

diff --git a/configure b/configure
index e853deb51d..9fa639fca6 100755
--- a/configure
+++ b/configure
@@ -7300,8 +7300,8 @@ enabled vdpau &&
 check_lib vdpau_x11 "vdpau/vdpau.h vdpau/vdpau_x11.h" 
vdp_device_create_x11 -lvdpau -lX11
 
 if enabled vulkan; then
-check_pkg_config_header_only vulkan "vulkan >= 1.3.255" "vulkan/vulkan.h" 
"defined VK_VERSION_1_3" ||
-check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) 
|| (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 255)"
+check_pkg_config_header_only vulkan "vulkan >= 1.3.277" "vulkan/vulkan.h" 
"defined VK_VERSION_1_3" ||
+check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) 
|| (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 277)"
 fi
 
 if disabled vulkan; then
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 7ef2e03ca6..9ce6d445c1 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -1258,8 +1258,7 @@ SKIPHEADERS+= %_tablegen.h
  \
   aacenc_quantization.h \
   aacenc_quantization_misc.h\
   bitstream_template.h  \
-  vulkan_video_codec_av1std_mesa.h \
-  $(ARCH)/vpx_arith.h  \
+  $(ARCH)/vpx_arith.h   \
 
 SKIPHEADERS-$(CONFIG_AMF)  += amfenc.h
 SKIPHEADERS-$(CONFIG_D3D11VA)  += d3d11va.h dxva2_internal.h
@@ -1280,7 +1279,7 @@ SKIPHEADERS-$(CONFIG_QSVENC)   += qsvenc.h
 SKIPHEADERS-$(CONFIG_VAAPI)+= vaapi_decode.h vaapi_hevc.h 
vaapi_encode.h
 SKIPHEADERS-$(CONFIG_VDPAU)+= vdpau.h vdpau_internal.h
 SKIPHEADERS-$(CONFIG_VIDEOTOOLBOX) += videotoolbox.h vt_internal.h
-SKIPHEADERS-$(CONFIG_VULKAN)   += vulkan.h vulkan_video.h 
vulkan_decode.h vulkan_video_codec_av1std_decode_mesa.h
+SKIPHEADERS-$(CONFIG_VULKAN)   += vulkan.h vulkan_video.h 
vulkan_decode.h
 SKIPHEADERS-$(CONFIG_V4L2_M2M) += v4l2_buffers.h v4l2_context.h 
v4l2_m2m.h
 SKIPHEADERS-$(CONFIG_ZLIB) += zlib_wrapper.h
 
diff --git a/libavcodec/vulkan_av1.c b/libavcodec/vulkan_av1.c
index 5afd5353cc..c9e398eaec 100644
--- a/libavcodec/vulkan_av1.c
+++ b/libavcodec/vulkan_av1.c
@@ -26,7 +26,7 @@
 const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc = {
 .codec_id = AV_CODEC_ID_AV1,
 .decode_extension = FF_VK_EXT_VIDEO_DECODE_AV1,
-.decode_op= 0x0100, /* TODO fix this */
+.decode_op= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR,
 .ext_props = {
 .extensionName = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_EXTENSION_NAME,
 .specVersion   = VK_STD_VULKAN_VIDEO_CODEC_AV1_DECODE_SPEC_VERSION,
@@ -36,33 +36,47 @@ const FFVulkanDecodeDescriptor ff_vk_dec_av1_desc = {
 typedef struct AV1VulkanDecodePicture {
 FFVulkanDecodePicture   vp;
 
-/* Workaround for a spec issue.
- *Can be removed once no longer needed, and threading can be enabled. */
+/* TODO: investigate if this can be removed to make decoding completely
+ * independent. */
 FFVulkanDecodeContext  *dec;
 
-StdVideoAV1MESATiletiles[MAX_TILES];
-StdVideoAV1MESATileListtile_list;
-const uint32_t*tile_offsets;
+uint32_t tile_sizes[MAX_TILES];
 
 /* Current picture */
-VkVideoDecodeAV1DpbSlotInfoMESAvkav1_ref;
-StdVideoAV1MESAFrameHeader av1_frame_header;
-VkVideoDecodeAV1PictureInfoMESAav1_pic_info;
+StdVideoDecodeAV1ReferenceInfo std_ref;
+VkVideoDecodeAV1DpbSlotInfoKHR vkav

[FFmpeg-cvslog] av1dec: add AV1_REF_FRAME_NONE

2024-03-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Mar 17 19:31:17 2024 
+0100| [998aa66a10546be207d5dfc1a4a76bc2ce9ea07c] | committer: Lynne

av1dec: add AV1_REF_FRAME_NONE

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=998aa66a10546be207d5dfc1a4a76bc2ce9ea07c
---

 libavcodec/av1.h |  1 +
 libavcodec/cbs_av1_syntax_template.c | 12 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/libavcodec/av1.h b/libavcodec/av1.h
index 8704bc41c1..94e88f8484 100644
--- a/libavcodec/av1.h
+++ b/libavcodec/av1.h
@@ -58,6 +58,7 @@ enum {
 
 // Reference frames (section 6.10.24).
 enum {
+AV1_REF_FRAME_NONE= -1,
 AV1_REF_FRAME_INTRA   = 0,
 AV1_REF_FRAME_LAST= 1,
 AV1_REF_FRAME_LAST2   = 2,
diff --git a/libavcodec/cbs_av1_syntax_template.c 
b/libavcodec/cbs_av1_syntax_template.c
index 2979c5d98f..3f4b13a177 100644
--- a/libavcodec/cbs_av1_syntax_template.c
+++ b/libavcodec/cbs_av1_syntax_template.c
@@ -360,7 +360,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 int i, j;
 
 for (i = 0; i < AV1_REFS_PER_FRAME; i++)
-ref_frame_idx[i] = -1;
+ref_frame_idx[i] = AV1_REF_FRAME_NONE;
 ref_frame_idx[AV1_REF_FRAME_LAST - AV1_REF_FRAME_LAST] = 
current->last_frame_idx;
 ref_frame_idx[AV1_REF_FRAME_GOLDEN - AV1_REF_FRAME_LAST] = 
current->golden_frame_idx;
 
@@ -378,7 +378,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 latest_order_hint = shifted_order_hints[current->last_frame_idx];
 earliest_order_hint = shifted_order_hints[current->golden_frame_idx];
 
-ref = -1;
+ref = AV1_REF_FRAME_NONE;
 for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
 int hint = shifted_order_hints[i];
 if (!used_frame[i] && hint >= cur_frame_hint &&
@@ -392,7 +392,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 used_frame[ref] = 1;
 }
 
-ref = -1;
+ref = AV1_REF_FRAME_NONE;
 for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
 int hint = shifted_order_hints[i];
 if (!used_frame[i] && hint >= cur_frame_hint &&
@@ -406,7 +406,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 used_frame[ref] = 1;
 }
 
-ref = -1;
+ref = AV1_REF_FRAME_NONE;
 for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
 int hint = shifted_order_hints[i];
 if (!used_frame[i] && hint >= cur_frame_hint &&
@@ -423,7 +423,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 for (i = 0; i < AV1_REFS_PER_FRAME - 2; i++) {
 int ref_frame = ref_frame_list[i];
 if (ref_frame_idx[ref_frame - AV1_REF_FRAME_LAST] < 0 ) {
-ref = -1;
+ref = AV1_REF_FRAME_NONE;
 for (j = 0; j < AV1_NUM_REF_FRAMES; j++) {
 int hint = shifted_order_hints[j];
 if (!used_frame[j] && hint < cur_frame_hint &&
@@ -439,7 +439,7 @@ static int FUNC(set_frame_refs)(CodedBitstreamContext *ctx, 
RWContext *rw,
 }
 }
 
-ref = -1;
+ref = AV1_REF_FRAME_NONE;
 for (i = 0; i < AV1_NUM_REF_FRAMES; i++) {
 int hint = shifted_order_hints[i];
 if (ref < 0 || hint < earliest_order_hint) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: fix compilation with glslang 14

2024-02-18 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Jan 31 17:28:56 2024 
+0100| [e43615fc2ab27d562ed7e087803f4a364a7d1175] | committer: Lynne

configure: fix compilation with glslang 14

The configure check already had fallback for the previous version
of glslang, which had different requirements for flags.
This commit simply moves the flags needed for glslang 13 to the
fallback, while first trying to use new flags for glslang 14.

This drops support for ~3 year old glslang versions, which
I'm not sure had the complete C API we're using anyway.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e43615fc2ab27d562ed7e087803f4a364a7d1175
---

 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index d66694e83e..23066efa32 100755
--- a/configure
+++ b/configure
@@ -6781,11 +6781,11 @@ enabled libfreetype   && require_pkg_config 
libfreetype freetype2 "ft2build.
 enabled libfribidi&& require_pkg_config libfribidi fribidi fribidi.h 
fribidi_version_info
 enabled libharfbuzz   && require_pkg_config libharfbuzz harfbuzz hb.h 
hb_buffer_create
 enabled libglslang && { check_lib spirv_compiler 
glslang/Include/glslang_c_interface.h glslang_initialize_process \
--lglslang -lMachineIndependent -lOSDependent 
-lHLSL -lOGLCompiler -lGenericCodeGen \
+-lglslang -lMachineIndependent -lGenericCodeGen \
 -lSPVRemapper -lSPIRV -lSPIRV-Tools-opt 
-lSPIRV-Tools -lpthread -lstdc++ -lm ||
 require spirv_compiler 
glslang/Include/glslang_c_interface.h glslang_initialize_process \
--lglslang -lOSDependent -lHLSL -lOGLCompiler \
--lSPVRemapper -lSPIRV -lSPIRV-Tools-opt 
-lSPIRV-Tools -lpthread -lstdc++ -lm; }
+-lglslang -lMachineIndependent -lOSDependent 
-lHLSL -lOGLCompiler -lGenericCodeGen \
+-lSPVRemapper -lSPIRV -lSPIRV-Tools-opt 
-lSPIRV-Tools -lpthread -lstdc++ -lm ; }
 enabled libgme&& { check_pkg_config libgme libgme gme/gme.h 
gme_new_emu ||
require libgme gme/gme.h gme_new_emu -lgme 
-lstdc++; }
 enabled libgsm&& { for gsm_hdr in "gsm.h" "gsm/gsm.h"; do

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavu/tx: correctly use a default scale parameter for all transform types

2024-02-18 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Feb 17 20:50:48 2024 
+0100| [c7ceff690f16f04f3fc2e48ac73f48cebc445587] | committer: Lynne

lavu/tx: correctly use a default scale parameter for all transform types

This fixes the previous commit and adds more cases (DCT-I and DST-I).

I am holding off on defining a scale parameter for FFTs as I'd like
to use a complex value for them.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c7ceff690f16f04f3fc2e48ac73f48cebc445587
---

 libavutil/tx.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index cc360cff31..f991618b4b 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -914,10 +914,12 @@ av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, 
enum AVTXType type,
 if (!(flags & AV_TX_INPLACE))
 flags |= FF_TX_OUT_OF_PLACE;
 
-if (!scale && ((type == AV_TX_FLOAT_MDCT) || (type == AV_TX_INT32_MDCT) || 
(type == AV_TX_FLOAT_RDFT) || (AV_TX_INT32_RDFT)))
-scale = _scale_f;
-else if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == 
AV_TX_DOUBLE_RDFT)))
+if (!scale && ((type == AV_TX_DOUBLE_MDCT) || (type == AV_TX_DOUBLE_DCT) ||
+   (type == AV_TX_DOUBLE_DCT_I) || (type == 
AV_TX_DOUBLE_DST_I) ||
+   (type == AV_TX_DOUBLE_RDFT)))
 scale = _scale_d;
+else if (!scale && !TYPE_IS(FFT, type))
+scale = _scale_f;
 
 ret = ff_tx_init_subtx(, type, flags, NULL, len, inv, scale);
 if (ret < 0)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfft: avoid overreads with RDFT API users

2024-02-09 Thread Lynne
ffmpeg | branch: release/6.1 | Lynne  | Fri Feb  9 18:17:54 2024 
+0100| [8815d775322570db8ecf82124467a818c681ed90] | committer: Lynne

avfft: avoid overreads with RDFT API users

The new API requires an extra array member at the very end,
which old API users did not do.

This disables in-place RDFT transforms and instead
does the transform out of place by copying once, there shouldn't
be a significant loss of speed as our in-place FFT requires a reorder
which is likely more expensive in the majority of cases to do.

(cherry picked from commit 90adef99cab46ed1791c8096ac2ac0b89f67a266)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8815d775322570db8ecf82124467a818c681ed90
---

 libavcodec/avfft.c | 31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 999b5ed79a..627fd7a0be 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -152,7 +152,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType 
trans)
 return NULL;
 
 ret = av_tx_init(>ctx, >fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
- 1 << nbits, , AV_TX_INPLACE);
+ 1 << nbits, , 0x0);
 if (ret < 0) {
 av_free(s);
 return NULL;
@@ -162,17 +162,35 @@ RDFTContext *av_rdft_init(int nbits, enum 
RDFTransformType trans)
 s->len = 1 << nbits;
 s->inv = trans == IDFT_C2R;
 
+s->tmp = av_malloc((s->len + 2)*sizeof(float));
+if (!s->tmp) {
+av_tx_uninit(>ctx);
+av_free(s);
+return NULL;
+}
+
 return (RDFTContext *)s;
 }
 
 void av_rdft_calc(RDFTContext *s, FFTSample *data)
 {
 AVTXWrapper *w = (AVTXWrapper *)s;
-if (w->inv)
-FFSWAP(float, data[1], data[w->len]);
-w->fn(w->ctx, data, (void *)data, w->stride);
-if (!w->inv)
-FFSWAP(float, data[1], data[w->len]);
+float *src = w->inv ? w->tmp : (float *)data;
+float *dst = w->inv ? (float *)data : w->tmp;
+
+if (w->inv) {
+memcpy(src, data, w->len*sizeof(float));
+
+src[w->len] = src[1];
+src[1] = 0.0f;
+}
+
+w->fn(w->ctx, dst, (void *)src, w->stride);
+
+if (!w->inv) {
+dst[1] = dst[w->len];
+memcpy(data, dst, w->len*sizeof(float));
+}
 }
 
 av_cold void av_rdft_end(RDFTContext *s)
@@ -180,6 +198,7 @@ av_cold void av_rdft_end(RDFTContext *s)
 if (s) {
 AVTXWrapper *w = (AVTXWrapper *)s;
 av_tx_uninit(>ctx);
+av_free(w->tmp);
 av_free(w);
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfft: avoid overreads with RDFT API users

2024-02-09 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Feb  9 18:17:54 2024 
+0100| [90adef99cab46ed1791c8096ac2ac0b89f67a266] | committer: Lynne

avfft: avoid overreads with RDFT API users

The new API requires an extra array member at the very end,
which old API users did not do.

This disables in-place RDFT transforms and instead
does the transform out of place by copying once, there shouldn't
be a significant loss of speed as our in-place FFT requires a reorder
which is likely more expensive in the majority of cases to do.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=90adef99cab46ed1791c8096ac2ac0b89f67a266
---

 libavcodec/avfft.c | 31 +--
 1 file changed, 25 insertions(+), 6 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 999b5ed79a..627fd7a0be 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -152,7 +152,7 @@ RDFTContext *av_rdft_init(int nbits, enum RDFTransformType 
trans)
 return NULL;
 
 ret = av_tx_init(>ctx, >fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
- 1 << nbits, , AV_TX_INPLACE);
+ 1 << nbits, , 0x0);
 if (ret < 0) {
 av_free(s);
 return NULL;
@@ -162,17 +162,35 @@ RDFTContext *av_rdft_init(int nbits, enum 
RDFTransformType trans)
 s->len = 1 << nbits;
 s->inv = trans == IDFT_C2R;
 
+s->tmp = av_malloc((s->len + 2)*sizeof(float));
+if (!s->tmp) {
+av_tx_uninit(>ctx);
+av_free(s);
+return NULL;
+}
+
 return (RDFTContext *)s;
 }
 
 void av_rdft_calc(RDFTContext *s, FFTSample *data)
 {
 AVTXWrapper *w = (AVTXWrapper *)s;
-if (w->inv)
-FFSWAP(float, data[1], data[w->len]);
-w->fn(w->ctx, data, (void *)data, w->stride);
-if (!w->inv)
-FFSWAP(float, data[1], data[w->len]);
+float *src = w->inv ? w->tmp : (float *)data;
+float *dst = w->inv ? (float *)data : w->tmp;
+
+if (w->inv) {
+memcpy(src, data, w->len*sizeof(float));
+
+src[w->len] = src[1];
+src[1] = 0.0f;
+}
+
+w->fn(w->ctx, dst, (void *)src, w->stride);
+
+if (!w->inv) {
+dst[1] = dst[w->len];
+memcpy(data, dst, w->len*sizeof(float));
+}
 }
 
 av_cold void av_rdft_end(RDFTContext *s)
@@ -180,6 +198,7 @@ av_cold void av_rdft_end(RDFTContext *s)
 if (s) {
 AVTXWrapper *w = (AVTXWrapper *)s;
 av_tx_uninit(>ctx);
+av_free(w->tmp);
 av_free(w);
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] x86/tx_init: propely indicate the extended available transform sizes

2024-02-09 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Feb  9 15:21:37 2024 
+0100| [9af87828bd787e09724b86d233ead75d6589ae79] | committer: Lynne

x86/tx_init: propely indicate the extended available transform sizes

Forgot to do this with the previous commit.

Actually makes the assembly being used.

Still the fastest FFT in the world, 15% faster than FFTW on the
largest available size.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9af87828bd787e09724b86d233ead75d6589ae79
---

 libavutil/x86/tx_float_init.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/libavutil/x86/tx_float_init.c b/libavutil/x86/tx_float_init.c
index d3c0beb50f..84ec1122f6 100644
--- a/libavutil/x86/tx_float_init.c
+++ b/libavutil/x86/tx_float_init.c
@@ -270,15 +270,15 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] 
= {
AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
 TX_DEF(fft32_ns, FFT, 32, 32, 2, 0, 352, b8_i2, fma3, FMA3, AV_TX_INPLACE 
| FF_TX_PRESHUFFLE,
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr,FFT, 64, 131072, 2, 0, 256, b8_i2, avx, AVX,  0, 
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,
+TX_DEF(fft_sr,FFT, 64, 2097152, 2, 0, 256, b8_i2, avx, AVX,  0, 
AV_CPU_FLAG_AVXSLOW),
+TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,
AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 320, b8_i2, avx, AVX,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 320, b8_i2, avx, AVX,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr,FFT, 64, 131072, 2, 0, 288, b8_i2, fma3,  FMA3,  0, 
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,
+TX_DEF(fft_sr,FFT, 64, 2097152, 2, 0, 288, b8_i2, fma3,  FMA3,  0, 
AV_CPU_FLAG_AVXSLOW),
+TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,
AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW),
-TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 352, b8_i2, fma3,  FMA3,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 352, b8_i2, fma3,  FMA3,  
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
AV_CPU_FLAG_AVXSLOW),
 
 #if HAVE_AVX2_EXTERNAL
@@ -287,11 +287,11 @@ const FFTXCodelet * const ff_tx_codelet_list_float_x86[] 
= {
 TX_DEF(fft15_ns, FFT, 15, 15, 15, 0, 384, factor_init, avx2, AVX2,
AV_TX_INPLACE | FF_TX_PRESHUFFLE, AV_CPU_FLAG_AVXSLOW),
 
-TX_DEF(fft_sr,FFT, 64, 131072, 2, 0, 320, b8_i2, avx2, AVX2, 0,
+TX_DEF(fft_sr,FFT, 64, 2097152, 2, 0, 320, b8_i2, avx2, AVX2, 0,
AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-TX_DEF(fft_sr_asm, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2,
+TX_DEF(fft_sr_asm, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2,
AV_TX_INPLACE | FF_TX_PRESHUFFLE | FF_TX_ASM_CALL, 
AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
-TX_DEF(fft_sr_ns, FFT, 64, 131072, 2, 0, 384, b8_i2, avx2, AVX2, 
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
+TX_DEF(fft_sr_ns, FFT, 64, 2097152, 2, 0, 384, b8_i2, avx2, AVX2, 
AV_TX_INPLACE | FF_TX_PRESHUFFLE,
AV_CPU_FLAG_AVXSLOW | AV_CPU_FLAG_SLOW_GATHER),
 
 TX_DEF(fft_pfa_15xM, FFT, 60, TX_LEN_UNLIMITED, 15, 2, 320, fft_pfa_init, 
avx2, AVX2,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] x86/tx_float: enable SIMD for sizes over 131072

2024-02-07 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Jan 18 17:30:29 2024 
+0100| [bd3e71b21ec3786ec6fc00ef260af0150f31b71b] | committer: Lynne

x86/tx_float: enable SIMD for sizes over 131072

The tables for the new sizes were added last year due
to being required for SDR.
However, the assembly was never updated to use them.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bd3e71b21ec3786ec6fc00ef260af0150f31b71b
---

 libavutil/x86/tx_float.asm | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index e1533a8595..42006848f1 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -46,7 +46,7 @@
 %endif
 
 %assign i 16
-%rep 14
+%rep 18
 cextern tab_ %+ i %+ _float ; ff_tab_i_float...
 %assign i (i << 1)
 %endrep
@@ -1385,7 +1385,11 @@ FFT_SPLIT_RADIX_DEF 8192,  .16384pt
 FFT_SPLIT_RADIX_DEF 16384, .32768pt
 FFT_SPLIT_RADIX_DEF 32768, .65536pt
 FFT_SPLIT_RADIX_DEF 65536, .131072pt
-FFT_SPLIT_RADIX_DEF 131072
+FFT_SPLIT_RADIX_DEF 131072, .262144pt
+FFT_SPLIT_RADIX_DEF 262144, .524288pt
+FFT_SPLIT_RADIX_DEF 524288, .1048576pt
+FFT_SPLIT_RADIX_DEF 1048576, .2097152pt
+FFT_SPLIT_RADIX_DEF 2097152
 
 
;===
 ; Final synthesis + deinterleaving code

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/vsrc_testsrc_vulkan: fix -Wint-conversion

2024-01-31 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Jan 31 14:15:04 2024 
+0100| [5860a966d2fffbbda1af0014f0a4d37a21c4f2ca] | committer: Lynne

lavfi/vsrc_testsrc_vulkan: fix -Wint-conversion

While VK_NULL_HANDLE is equivalent to NULL on 64-bit platforms, the same is not
true across all platforms.

Fixes building with gcc-14.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5860a966d2fffbbda1af0014f0a4d37a21c4f2ca
---

 libavfilter/vsrc_testsrc_vulkan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vsrc_testsrc_vulkan.c 
b/libavfilter/vsrc_testsrc_vulkan.c
index 8761c21dfd..1720bfac5e 100644
--- a/libavfilter/vsrc_testsrc_vulkan.c
+++ b/libavfilter/vsrc_testsrc_vulkan.c
@@ -231,7 +231,7 @@ static int testsrc_vulkan_activate(AVFilterContext *ctx)
 return AVERROR(ENOMEM);
 
 err = ff_vk_filter_process_simple(>vkctx, >e, >pl, 
s->picref, NULL,
-  NULL, >opts, sizeof(s->opts));
+  VK_NULL_HANDLE, >opts, 
sizeof(s->opts));
 if (err < 0)
 return err;
 }
@@ -250,7 +250,7 @@ static int testsrc_vulkan_activate(AVFilterContext *ctx)
 frame->sample_aspect_ratio = s->sar;
 if (!s->draw_once) {
 err = ff_vk_filter_process_simple(>vkctx, >e, >pl, frame, 
NULL,
-  NULL, >opts, sizeof(s->opts));
+  VK_NULL_HANDLE, >opts, 
sizeof(s->opts));
 if (err < 0) {
 av_frame_free();
 return err;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: update copyright year

2023-12-31 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Jan  1 00:00:00 2024 
+| [b95ee2ec5f84054de8bf6db9fe1b1119d569f269] | committer: Lynne

configure: update copyright year

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b95ee2ec5f84054de8bf6db9fe1b1119d569f269
---

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index cd66e42850..65b4659b0a 100755
--- a/configure
+++ b/configure
@@ -8045,7 +8045,7 @@ cat > $TMPH <https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/Makefile: build vulkan decode code if vulkan_av1 has been enabled

2023-12-03 Thread Lynne
ffmpeg | branch: release/6.1 | Lynne  | Sun Dec  3 21:02:13 2023 
+0100| [2c87aa0b231954d32909c0df48cb27ff89fd4506] | committer: Lynne

lavc/Makefile: build vulkan decode code if vulkan_av1 has been enabled

Forgotten.

Reviewed-by: Neal Gompa 
Tested-by: Neal Gompa 
(cherry picked from commit 8c117b75afa3c6b824fab85ec6716dbe3ba975be)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2c87aa0b231954d32909c0df48cb27ff89fd4506
---

 libavcodec/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 580a8d6b54..ec57e53e30 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -993,7 +993,7 @@ OBJS-$(CONFIG_AV1_DXVA2_HWACCEL)  += dxva2_av1.o
 OBJS-$(CONFIG_AV1_NVDEC_HWACCEL)  += nvdec_av1.o
 OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)  += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)  += vdpau_av1.o
-OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_av1.o
+OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)   += dxva2_h264.o

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/Makefile: build vulkan decode code if vulkan_av1 has been enabled

2023-12-03 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Dec  3 21:02:13 2023 
+0100| [8c117b75afa3c6b824fab85ec6716dbe3ba975be] | committer: Lynne

lavc/Makefile: build vulkan decode code if vulkan_av1 has been enabled

Forgotten.

Reviewed-by: Neal Gompa 
Tested-by: Neal Gompa 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8c117b75afa3c6b824fab85ec6716dbe3ba975be
---

 libavcodec/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 748806e702..fd9883d2ca 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -998,7 +998,7 @@ OBJS-$(CONFIG_AV1_DXVA2_HWACCEL)  += dxva2_av1.o
 OBJS-$(CONFIG_AV1_NVDEC_HWACCEL)  += nvdec_av1.o
 OBJS-$(CONFIG_AV1_VAAPI_HWACCEL)  += vaapi_av1.o
 OBJS-$(CONFIG_AV1_VDPAU_HWACCEL)  += vdpau_av1.o
-OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_av1.o
+OBJS-$(CONFIG_AV1_VULKAN_HWACCEL) += vulkan_decode.o vulkan_av1.o
 OBJS-$(CONFIG_H263_VAAPI_HWACCEL) += vaapi_mpeg4.o
 OBJS-$(CONFIG_H263_VIDEOTOOLBOX_HWACCEL)  += videotoolbox.o
 OBJS-$(CONFIG_H264_D3D11VA_HWACCEL)   += dxva2_h264.o

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] nlmeans_vulkan: fix offsets calculation and various stride issues

2023-11-09 Thread Lynne
ffmpeg | branch: release/6.1 | Lynne  | Tue Nov  7 07:27:30 2023 
+| [86c4d04051e00998327fece4c3e3e4edcfb64482] | committer: Lynne

nlmeans_vulkan: fix offsets calculation and various stride issues

We calculated offsets as pairs, but addressed them in the shader
as single float values, while reading them as ivec2s.

Also removes unused code (was provisionally added if cooperative matrices
could be used, but that turned out to be impossible).

(cherry picked from commit 99fcdee5e80db8a2a8ff1ea9b66a9b74d8f96f67)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=86c4d04051e00998327fece4c3e3e4edcfb64482
---

 libavfilter/vf_nlmeans_vulkan.c | 78 -
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 2b8f97d7d9..fac38d16f4 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -94,7 +94,7 @@ static void insert_horizontal_pass(FFVkSPIRVShader *shd, int 
nb_rows, int first,
 GLSLC(2, #pragma unroll(1) 
   );
 GLSLF(2, for (r = 0; r < %i; r++) {
   ,nb_rows);
 GLSLC(3, prefix_sum = DTYPE(0);
   );
-GLSLC(3, offset = uint64_t(int_stride)*(pos.y + r)*T_ALIGN;
   );
+GLSLC(3, offset = int_stride * uint64_t(pos.y + r);
   );
 GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);   
   );
 GLSLC(0,   
   );
 GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { 
   ,plane);
@@ -122,7 +122,7 @@ static void insert_vertical_pass(FFVkSPIRVShader *shd, int 
nb_rows, int first, i
 GLSLC(0,   
   );
 GLSLF(1, if (pos.x < width[%i]) {  
   ,plane);
 GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) {
   ,plane);
-GLSLC(3, offset = uint64_t(int_stride)*pos.y*T_ALIGN;  
   );
+GLSLC(3, offset = int_stride * uint64_t(pos.y);
   );
 GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);   
   );
 GLSLC(0,   
   );
 GLSLC(3, #pragma unroll(1) 
   );
@@ -167,40 +167,26 @@ static void insert_weights_pass(FFVkSPIRVShader *shd, int 
nb_rows, int vert,
 GLSLC(0,  
);
 GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); 
);
 GLSLC(0,  
);
-if (TYPE_ELEMS == 4) {
-GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i];   
,plane, comp);
-GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i];   
,plane, comp);
-GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i];   
,plane, comp);
-GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i];   
,plane, comp);
-} else {
-for (int i = 0; i < 16; i++)
-GLSLF(3, src[%i][%i] = texture(input_img[%i], pos + offs[%i])[%i];
-  ,i / 4, i % 4, plane, i, comp);
-
-}
+GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i];  
,plane, comp);
+GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i];  
,plane, comp);
+GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i];  
,plane, comp);
+GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i];  
,plane, comp);
 GLSLC(0,  
);
 GLSLC(3, if (lt == false) {   
);
-GLSLC(4, a = integral_data.v[(pos.y - p)*int_stride + pos.x - 
p]; );
-GLSLC(4, c = integral_data.v[(pos.y - p)*int_stride + pos.x + 
p]; );
-GLSLC(4, b = integral_data.v[(pos.y + p)*int_stride + pos.x - 
p]; );
-GLSLC(4, d = integral_data.v[(pos.y + p)*int_stride + pos.x + 
p]; );
+GLSLC(3, offset = int_stride * uint64_t(pos.y - p);   
);
+GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);  
);
+GLSLC(4, a = dst.v[pos.x - p];
);
+GLSLC(4, c = dst.v[pos.x + p];
);
+GLSLC(3, offset = int_stride * uint64_t(pos.y + p);   
);
+GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);  
);
+GLSLC(4,

[FFmpeg-cvslog] nlmeans_vulkan: fix offsets calculation and various stride issues

2023-11-09 Thread Lynne
ffmpeg | branch: master | Lynne  | Tue Nov  7 07:27:30 2023 
+| [99fcdee5e80db8a2a8ff1ea9b66a9b74d8f96f67] | committer: Lynne

nlmeans_vulkan: fix offsets calculation and various stride issues

We calculated offsets as pairs, but addressed them in the shader
as single float values, while reading them as ivec2s.

Also removes unused code (was provisionally added if cooperative matrices
could be used, but that turned out to be impossible).

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=99fcdee5e80db8a2a8ff1ea9b66a9b74d8f96f67
---

 libavfilter/vf_nlmeans_vulkan.c | 78 -
 1 file changed, 31 insertions(+), 47 deletions(-)

diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 2b8f97d7d9..fac38d16f4 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -94,7 +94,7 @@ static void insert_horizontal_pass(FFVkSPIRVShader *shd, int 
nb_rows, int first,
 GLSLC(2, #pragma unroll(1) 
   );
 GLSLF(2, for (r = 0; r < %i; r++) {
   ,nb_rows);
 GLSLC(3, prefix_sum = DTYPE(0);
   );
-GLSLC(3, offset = uint64_t(int_stride)*(pos.y + r)*T_ALIGN;
   );
+GLSLC(3, offset = int_stride * uint64_t(pos.y + r);
   );
 GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);   
   );
 GLSLC(0,   
   );
 GLSLF(3, for (pos.x = 0; pos.x < width[%i]; pos.x++) { 
   ,plane);
@@ -122,7 +122,7 @@ static void insert_vertical_pass(FFVkSPIRVShader *shd, int 
nb_rows, int first, i
 GLSLC(0,   
   );
 GLSLF(1, if (pos.x < width[%i]) {  
   ,plane);
 GLSLF(2, for (pos.y = 0; pos.y < height[%i]; pos.y++) {
   ,plane);
-GLSLC(3, offset = uint64_t(int_stride)*pos.y*T_ALIGN;  
   );
+GLSLC(3, offset = int_stride * uint64_t(pos.y);
   );
 GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);   
   );
 GLSLC(0,   
   );
 GLSLC(3, #pragma unroll(1) 
   );
@@ -167,40 +167,26 @@ static void insert_weights_pass(FFVkSPIRVShader *shd, int 
nb_rows, int vert,
 GLSLC(0,  
);
 GLSLC(3, lt = ((pos.x - p) < 0) || ((pos.y - p) < 0); 
);
 GLSLC(0,  
);
-if (TYPE_ELEMS == 4) {
-GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i];   
,plane, comp);
-GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i];   
,plane, comp);
-GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i];   
,plane, comp);
-GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i];   
,plane, comp);
-} else {
-for (int i = 0; i < 16; i++)
-GLSLF(3, src[%i][%i] = texture(input_img[%i], pos + offs[%i])[%i];
-  ,i / 4, i % 4, plane, i, comp);
-
-}
+GLSLF(3, src[0] = texture(input_img[%i], pos + offs[0])[%i];  
,plane, comp);
+GLSLF(3, src[1] = texture(input_img[%i], pos + offs[1])[%i];  
,plane, comp);
+GLSLF(3, src[2] = texture(input_img[%i], pos + offs[2])[%i];  
,plane, comp);
+GLSLF(3, src[3] = texture(input_img[%i], pos + offs[3])[%i];  
,plane, comp);
 GLSLC(0,  
);
 GLSLC(3, if (lt == false) {   
);
-GLSLC(4, a = integral_data.v[(pos.y - p)*int_stride + pos.x - 
p]; );
-GLSLC(4, c = integral_data.v[(pos.y - p)*int_stride + pos.x + 
p]; );
-GLSLC(4, b = integral_data.v[(pos.y + p)*int_stride + pos.x - 
p]; );
-GLSLC(4, d = integral_data.v[(pos.y + p)*int_stride + pos.x + 
p]; );
+GLSLC(3, offset = int_stride * uint64_t(pos.y - p);   
);
+GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);  
);
+GLSLC(4, a = dst.v[pos.x - p];
);
+GLSLC(4, c = dst.v[pos.x + p];
);
+GLSLC(3, offset = int_stride * uint64_t(pos.y + p);   
);
+GLSLC(3, dst = DataBuffer(uint64_t(integral_data) + offset);  
);
+GLSLC(4, b = dst.v[pos.x - p];
);
+GLSLC(4,

[FFmpeg-cvslog] bwdif_vulkan: fix artifacts on vulkan decode images

2023-10-31 Thread Lynne
ffmpeg | branch: release/6.1 | Lynne  | Sun Oct 29 07:19:25 2023 
+0100| [4e5f3e6b8e1132354eed810dfdadf87f45c5de27] | committer: Lynne

bwdif_vulkan: fix artifacts on vulkan decode images

Due to making the decode frames context use the coded size, the
filter started to display those artifacts as it reused the input frame's size.

Change it to instead output the real image size for images, not the input.

(cherry picked from commit 0e8abf26983aa0dc72cbfbb094eeed13a9b55404)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4e5f3e6b8e1132354eed810dfdadf87f45c5de27
---

 libavfilter/vf_bwdif_vulkan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c
index f1623e6ef7..690a89c4ba 100644
--- a/libavfilter/vf_bwdif_vulkan.c
+++ b/libavfilter/vf_bwdif_vulkan.c
@@ -325,8 +325,8 @@ static int bwdif_vulkan_config_input(AVFilterLink *inlink)
 
 /* Defaults */
 vkctx->output_format = input_frames->sw_format;
-vkctx->output_width  = input_frames->width;
-vkctx->output_height = input_frames->height;
+vkctx->output_width  = inlink->w;
+vkctx->output_height = inlink->h;
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] bwdif_vulkan: fix artifacts on vulkan decode images

2023-10-31 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Oct 29 07:19:25 2023 
+0100| [0e8abf26983aa0dc72cbfbb094eeed13a9b55404] | committer: Lynne

bwdif_vulkan: fix artifacts on vulkan decode images

Due to making the decode frames context use the coded size, the
filter started to display those artifacts as it reused the input frame's size.

Change it to instead output the real image size for images, not the input.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0e8abf26983aa0dc72cbfbb094eeed13a9b55404
---

 libavfilter/vf_bwdif_vulkan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c
index f1623e6ef7..690a89c4ba 100644
--- a/libavfilter/vf_bwdif_vulkan.c
+++ b/libavfilter/vf_bwdif_vulkan.c
@@ -325,8 +325,8 @@ static int bwdif_vulkan_config_input(AVFilterLink *inlink)
 
 /* Defaults */
 vkctx->output_format = input_frames->sw_format;
-vkctx->output_width  = input_frames->width;
-vkctx->output_height = input_frames->height;
+vkctx->output_width  = inlink->w;
+vkctx->output_height = inlink->h;
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: update copyright year

2023-10-29 Thread Lynne
ffmpeg | branch: release/5.1 | Lynne  | Sun Jan  1 00:00:00 2023 
+0100| [dad04e27b000f649ed4afd00252b07d1d5b49b7e] | committer: Michael 
Niedermayer

configure: update copyright year

(cherry picked from commit 62da0b4a741a064f118a0eece496d6bcc437ec91)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dad04e27b000f649ed4afd00252b07d1d5b49b7e
---

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index ba5793b2ff..6629783f34 100755
--- a/configure
+++ b/configure
@@ -7783,7 +7783,7 @@ cat > $TMPH <https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan: return VK_NOT_READY when no queries are available

2023-10-28 Thread Lynne
ffmpeg | branch: release/6.1 | Lynne  | Wed Oct 25 22:58:20 2023 
+| [1a8e76698478006d97432f1eb972d37ef3549dbc] | committer: Lynne

vulkan: return VK_NOT_READY when no queries are available

Fixes a validation issue.
The issue is that the function gets called before we've sumitted a frame
for decoding to that context. However, we cannot run queries before
they've been reset, which happens at submission time.
As we'd need to otherwise run a command queue at init-time, just check
if submissions have happened.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1a8e76698478006d97432f1eb972d37ef3549dbc
---

 libavutil/vulkan.c | 5 +
 libavutil/vulkan.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index dec8ccad64..bf8456b06d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -456,6 +456,9 @@ VkResult ff_vk_exec_get_query(FFVulkanContext *s, 
FFVkExecContext *e,
 int64_t res = 0;
 VkQueryResultFlags qf = 0;
 
+if (!e->had_submission)
+return VK_NOT_READY;
+
 qf |= pool->query_64bit ?
   VK_QUERY_RESULT_64_BIT : 0x0;
 qf |= pool->query_statuses ?
@@ -779,6 +782,8 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext 
*e)
 }
 }
 
+e->had_submission = 1;
+
 return 0;
 }
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 25c5ad4b74..b666841836 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -154,6 +154,7 @@ typedef struct FFVkExecContext {
 uint32_t idx;
 const struct FFVkExecPool *parent;
 pthread_mutex_t lock;
+int had_submission;
 
 /* Queue for the execution context */
 VkQueue queue;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan: return VK_NOT_READY when no queries are available

2023-10-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Oct 25 22:58:20 2023 
+| [1a8e76698478006d97432f1eb972d37ef3549dbc] | committer: Lynne

vulkan: return VK_NOT_READY when no queries are available

Fixes a validation issue.
The issue is that the function gets called before we've sumitted a frame
for decoding to that context. However, we cannot run queries before
they've been reset, which happens at submission time.
As we'd need to otherwise run a command queue at init-time, just check
if submissions have happened.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1a8e76698478006d97432f1eb972d37ef3549dbc
---

 libavutil/vulkan.c | 5 +
 libavutil/vulkan.h | 1 +
 2 files changed, 6 insertions(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index dec8ccad64..bf8456b06d 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -456,6 +456,9 @@ VkResult ff_vk_exec_get_query(FFVulkanContext *s, 
FFVkExecContext *e,
 int64_t res = 0;
 VkQueryResultFlags qf = 0;
 
+if (!e->had_submission)
+return VK_NOT_READY;
+
 qf |= pool->query_64bit ?
   VK_QUERY_RESULT_64_BIT : 0x0;
 qf |= pool->query_statuses ?
@@ -779,6 +782,8 @@ int ff_vk_exec_submit(FFVulkanContext *s, FFVkExecContext 
*e)
 }
 }
 
+e->had_submission = 1;
+
 return 0;
 }
 
diff --git a/libavutil/vulkan.h b/libavutil/vulkan.h
index 25c5ad4b74..b666841836 100644
--- a/libavutil/vulkan.h
+++ b/libavutil/vulkan.h
@@ -154,6 +154,7 @@ typedef struct FFVkExecContext {
 uint32_t idx;
 const struct FFVkExecPool *parent;
 pthread_mutex_t lock;
+int had_submission;
 
 /* Queue for the execution context */
 VkQueue queue;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_decode: correct flipped condition in image layout

2023-10-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Oct 25 21:56:03 2023 
+0200| [70864e6adba636daf4551ba9e65a19eeb93bced1] | committer: Lynne

vulkan_decode: correct flipped condition in image layout

Changed by the previous commit.
Caused validation issues on hardware with !reuse_dpb_dst but not layered_dpb.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=70864e6adba636daf4551ba9e65a19eeb93bced1
---

 libavcodec/vulkan_decode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 7f575d1283..a89d84fcaa 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -449,7 +449,7 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
 .srcAccessMask = VK_ACCESS_2_NONE,
 .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
 .oldLayout = vkf->layout[0],
-.newLayout = (dec->layered_dpb && vp->dpb_frame) ?
+.newLayout = (dec->layered_dpb || vp->dpb_frame) ?
  VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR :
  VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 
utter madness */
 .srcQueueFamilyIndex = vkf->queue_family[0],

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_decode: fix pedantic validation issue

2023-10-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Tue Oct 24 22:43:06 2023 
+0200| [467e4118393170ff11b7725ec4565350fd1da195] | committer: Lynne

vulkan_decode: fix pedantic validation issue

"Validation Error: [ VUID-VkImageViewCreateInfo-imageViewType-04974 ] Object 0: 
handle = 0x9f9b413c, type = VK_OBJECT_TYPE_IMAGE; | MessageID = 
0xc120e150 | vkCreateImageView():
Using pCreateInfo->viewType VK_IMAGE_VIEW_TYPE_2D and the 
subresourceRange.layerCount VK_REMAINING_ARRAY_LAYERS=(17) and must 1 (try 
looking into VK_IMAGE_VIEW_TYPE_*_ARRAY).
The Vulkan spec states: If viewType is VK_IMAGE_VIEW_TYPE_1D, 
VK_IMAGE_VIEW_TYPE_2D, or VK_IMAGE_VIEW_TYPE_3D; and 
subresourceRange.layerCount is VK_REMAINING_ARRAY_LAYERS,
then the remaining number of layers must be 1"

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=467e4118393170ff11b7725ec4565350fd1da195
---

 libavcodec/vulkan_decode.c | 23 +--
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 3b9576c0a9..c01eeb9cdc 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -113,11 +113,12 @@ int ff_vk_params_invalidate(AVCodecContext *avctx, int t, 
const uint8_t *b, uint
 return 0;
 }
 
-static int vk_decode_create_view(FFVulkanDecodeShared *ctx, VkImageView 
*dst_view,
+static int vk_decode_create_view(FFVulkanDecodeContext *dec, VkImageView 
*dst_view,
  VkImageAspectFlags *aspect, AVVkFrame *src,
- VkFormat vkf)
+ VkFormat vkf, int is_current)
 {
 VkResult ret;
+FFVulkanDecodeShared *ctx = dec->shared_ctx;
 FFVulkanFunctions *vk = >s.vkfn;
 VkImageAspectFlags aspect_mask = ff_vk_aspect_bits_from_vkfmt(vkf);
 
@@ -128,7 +129,8 @@ static int vk_decode_create_view(FFVulkanDecodeShared *ctx, 
VkImageView *dst_vie
 VkImageViewCreateInfo img_view_create_info = {
 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 .pNext = _sampler_info,
-.viewType = VK_IMAGE_VIEW_TYPE_2D,
+.viewType = dec->layered_dpb && !is_current ?
+VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D,
 .format = vkf,
 .image = src->img[0],
 .components = (VkComponentMapping) {
@@ -140,7 +142,8 @@ static int vk_decode_create_view(FFVulkanDecodeShared *ctx, 
VkImageView *dst_vie
 .subresourceRange = (VkImageSubresourceRange) {
 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 .baseArrayLayer = 0,
-.layerCount = VK_REMAINING_ARRAY_LAYERS,
+.layerCount = dec->layered_dpb && !is_current ?
+  VK_REMAINING_ARRAY_LAYERS : 1,
 .levelCount = 1,
 },
 };
@@ -203,10 +206,10 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext 
*dec, AVFrame *pic,
 if (!vkpic->dpb_frame)
 return AVERROR(ENOMEM);
 
-err = vk_decode_create_view(ctx, >img_view_ref,
+err = vk_decode_create_view(dec, >img_view_ref,
 >img_aspect_ref,
 (AVVkFrame *)vkpic->dpb_frame->data[0],
-dpb_hwfc->format[0]);
+dpb_hwfc->format[0], is_current);
 if (err < 0)
 return err;
 
@@ -217,10 +220,10 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext 
*dec, AVFrame *pic,
 AVHWFramesContext *frames = (AVHWFramesContext 
*)pic->hw_frames_ctx->data;
 AVVulkanFramesContext *hwfc = frames->hwctx;
 
-err = vk_decode_create_view(ctx, >img_view_out,
+err = vk_decode_create_view(dec, >img_view_out,
 >img_aspect,
 (AVVkFrame *)pic->data[0],
-hwfc->format[0]);
+hwfc->format[0], is_current);
 if (err < 0)
 return err;
 
@@ -1249,9 +1252,9 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 goto fail;
 }
 
-err = vk_decode_create_view(ctx, >layered_view, 
>layered_aspect,
+err = vk_decode_create_view(dec, >layered_view, 
>layered_aspect,
 (AVVkFrame 
*)ctx->layered_frame->data[0],
-s->hwfc->format[0]);
+s->hwfc->format[0], 0);
 if (err < 0)
 goto fail;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_decode: fix another validation issue

2023-10-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Oct 25 01:32:20 2023 
+0200| [0b3616231d330ff25b28a20795394777a3f91b6d] | committer: Lynne

vulkan_decode: fix another validation issue

Surprising no one, the insane usage rule has a catch.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0b3616231d330ff25b28a20795394777a3f91b6d
---

 libavcodec/vulkan_decode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index c01eeb9cdc..7f575d1283 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -449,7 +449,8 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
 .srcAccessMask = VK_ACCESS_2_NONE,
 .dstAccessMask = VK_ACCESS_2_VIDEO_DECODE_WRITE_BIT_KHR,
 .oldLayout = vkf->layout[0],
-.newLayout = vp->dpb_frame ? VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR :
+.newLayout = (dec->layered_dpb && vp->dpb_frame) ?
+ VK_IMAGE_LAYOUT_VIDEO_DECODE_DST_KHR :
  VK_IMAGE_LAYOUT_VIDEO_DECODE_DPB_KHR, /* Spec, 07252 
utter madness */
 .srcQueueFamilyIndex = vkf->queue_family[0],
 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_decode: use coded_width/height instead of the non-coded width and height

2023-10-25 Thread Lynne
ffmpeg | branch: master | Lynne  | Tue Oct 24 06:33:07 2023 
+0200| [9ee4f47c94083b4fe38d4e217a7d65055d3ad53f] | committer: Lynne

vulkan_decode: use coded_width/height instead of the non-coded width and height

Partially fixes 
https://streams.videolan.org/issues/19938/2_20180305-15.04.59.ts
The is coded as 1920x1080, meant to be rendered at 1440x1080 with cropping,
or 1680x1080 before cropping. Currently, the created DPB is 1440x1080, which 
results
in the image being decoded incorrectly, as the decoder overwrites output memory.
This commit fixes this.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9ee4f47c94083b4fe38d4e217a7d65055d3ad53f
---

 libavcodec/vulkan_decode.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index e6a0646139..3b9576c0a9 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -872,10 +872,10 @@ static int vulkan_decode_get_profile(AVCodecContext 
*avctx, AVBufferRef *frames_
" separate_references" : "");
 
 /* Check if decoding is possible with the given parameters */
-if (avctx->width  < caps->minCodedExtent.width   ||
-avctx->height < caps->minCodedExtent.height  ||
-avctx->width  > caps->maxCodedExtent.width   ||
-avctx->height > caps->maxCodedExtent.height)
+if (avctx->coded_width  < caps->minCodedExtent.width   ||
+avctx->coded_height < caps->minCodedExtent.height  ||
+avctx->coded_width  > caps->maxCodedExtent.width   ||
+avctx->coded_height > caps->maxCodedExtent.height)
 return AVERROR(EINVAL);
 
 if (!(avctx->hwaccel_flags & AV_HWACCEL_FLAG_IGNORE_LEVEL) &&
@@ -1027,8 +1027,8 @@ int ff_vk_frame_params(AVCodecContext *avctx, AVBufferRef 
*hw_frames_ctx)
 frames_ctx->user_opaque = prof;
 frames_ctx->free= free_profile_data;
 
-frames_ctx->width  = avctx->width;
-frames_ctx->height = avctx->height;
+frames_ctx->width  = avctx->coded_width;
+frames_ctx->height = avctx->coded_height;
 frames_ctx->format = AV_PIX_FMT_VULKAN;
 
 hwfc->format[0]= vkfmt;
@@ -1224,8 +1224,8 @@ int ff_vk_decode_init(AVCodecContext *avctx)
 dpb_frames = (AVHWFramesContext *)ctx->dpb_hwfc_ref->data;
 dpb_frames->format= s->frames->format;
 dpb_frames->sw_format = s->frames->sw_format;
-dpb_frames->width = s->frames->width;
-dpb_frames->height= s->frames->height;
+dpb_frames->width = avctx->coded_width;
+dpb_frames->height= avctx->coded_height;
 
 dpb_hwfc = dpb_frames->hwctx;
 dpb_hwfc->create_pnext = (void 
*)ff_vk_find_struct(ctx->s.hwfc->create_pnext,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] hwcontext_vulkan: improve queue family init code

2023-10-23 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Oct 22 03:52:33 2023 
+0200| [c258623c0a635d98e7e21123215446ebd2201b1e] | committer: Lynne

hwcontext_vulkan: improve queue family init code

When users zero-init'd the struct, or left it as-is, the encode
queue family matched the graphics queue family, which led it to be
incorrectly logged as being used for encode.

This just improves the logging so this isn't printed anymore.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c258623c0a635d98e7e21123215446ebd2201b1e
---

 libavutil/hwcontext_vulkan.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index b955ec483e..4adcc0e839 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -1475,11 +1475,11 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
 
 av_free(qf);
 
-graph_index = hwctx->queue_family_index;
-comp_index  = hwctx->queue_family_comp_index;
-tx_index= hwctx->queue_family_tx_index;
-enc_index   = hwctx->queue_family_encode_index;
-dec_index   = hwctx->queue_family_decode_index;
+graph_index = hwctx->nb_graphics_queues ? hwctx->queue_family_index : -1;
+comp_index  = hwctx->nb_comp_queues ? hwctx->queue_family_comp_index : -1;
+tx_index= hwctx->nb_tx_queues ? hwctx->queue_family_tx_index : -1;
+dec_index   = hwctx->nb_decode_queues ? hwctx->queue_family_decode_index : 
-1;
+enc_index   = hwctx->nb_encode_queues ? hwctx->queue_family_encode_index : 
-1;
 
 #define CHECK_QUEUE(type, required, fidx, ctx_qf, qc)  
 \
 do {   
 \
@@ -1512,10 +1512,10 @@ static int vulkan_device_init(AVHWDeviceContext *ctx)
 } while (0)
 
 CHECK_QUEUE("graphics", 0, graph_index, hwctx->queue_family_index,
hwctx->nb_graphics_queues);
-CHECK_QUEUE("upload",   1, tx_index,hwctx->queue_family_tx_index, 
hwctx->nb_tx_queues);
 CHECK_QUEUE("compute",  1, comp_index,  hwctx->queue_family_comp_index,   
hwctx->nb_comp_queues);
-CHECK_QUEUE("encode",   0, enc_index,   hwctx->queue_family_encode_index, 
hwctx->nb_encode_queues);
+CHECK_QUEUE("upload",   1, tx_index,hwctx->queue_family_tx_index, 
hwctx->nb_tx_queues);
 CHECK_QUEUE("decode",   0, dec_index,   hwctx->queue_family_decode_index, 
hwctx->nb_decode_queues);
+CHECK_QUEUE("encode",   0, enc_index,   hwctx->queue_family_encode_index, 
hwctx->nb_encode_queues);
 
 #undef CHECK_QUEUE
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: update copyright year

2023-10-19 Thread Lynne
ffmpeg | branch: release/2.8 | Lynne  | Sun Jan  1 00:00:00 2023 
+0100| [661a11fcc48efe0775812fc443d59e5c0a972fa1] | committer: Michael 
Niedermayer

configure: update copyright year

(cherry picked from commit 62da0b4a741a064f118a0eece496d6bcc437ec91)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=661a11fcc48efe0775812fc443d59e5c0a972fa1
---

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index dd0a65e678..44b04ad49d 100755
--- a/configure
+++ b/configure
@@ -6144,7 +6144,7 @@ cat > $TMPH <https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] configure: disable libglslang/libshaderc if the vulkan is disabled

2023-10-15 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Oct 14 18:36:46 2023 
+0200| [fec6e84b18685abd2f8b3192bee560ff52c5220b] | committer: Lynne

configure: disable libglslang/libshaderc if the vulkan is disabled

Fixes build failures when the Vulkan headers are too old and libglslang
or libshaderc are enabled.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fec6e84b18685abd2f8b3192bee560ff52c5220b
---

 configure | 4 
 1 file changed, 4 insertions(+)

diff --git a/configure b/configure
index 7af1d943e5..d203177a74 100755
--- a/configure
+++ b/configure
@@ -7154,6 +7154,10 @@ if enabled vulkan; then
 check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) 
|| (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 255)"
 fi
 
+if disabled vulkan; then
+disable libglslang libshaderc spirv_compiler
+fi
+
 if enabled x86; then
 case $target_os in
 mingw32*|mingw64*|win32|win64|linux|cygwin*)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] nlmeans_vulkan: reduce dispatches by parallelizing the planes

2023-10-11 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Sep 16 01:04:18 2023 
+0200| [658b01b5ee53826855dad316a2f4a1e4f16ca05a] | committer: Lynne

nlmeans_vulkan: reduce dispatches by parallelizing the planes

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=658b01b5ee53826855dad316a2f4a1e4f16ca05a
---

 libavfilter/vf_nlmeans_vulkan.c | 33 +
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 5b623eb7a6..9741dd67ac 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -538,28 +538,29 @@ static av_cold int init_denoise_pipeline(FFVulkanContext 
*vkctx, FFVkExecPool *e
 GLSLC(0, {
);
 GLSLC(1, ivec2 size;  
);
 GLSLC(1, const ivec2 pos = ivec2(gl_GlobalInvocationID.xy);   
);
+GLSLC(1, const uint plane = uint(gl_WorkGroupID.z);   
);
 GLSLC(0,  
);
 GLSLC(1, float w_sum; 
);
 GLSLC(1, float sum;   
);
 GLSLC(1, vec4 src;
);
 GLSLC(1, vec4 r;  
);
 GLSLC(0,  
);
-
-for (int i = 0; i < planes; i++) {
-GLSLF(1, src = texture(input_img[%i], pos); 
,i);
-for (int c = 0; c < desc->nb_components; c++) {
-if (desc->comp[c].plane == i) {
-int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 
8)/8);
-GLSLF(1, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x];  
 ,c, c);
-GLSLF(1, sum = sums_%i[pos.y*ws_stride[%i] + pos.x];   
 ,c, c);
-GLSLF(1, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255;
 ,off, off);
-GLSLC(0,   
  );
-}
-}
-GLSLF(1, imageStore(output_img[%i], pos, r);
,i);
-GLSLC(0,  
);
+GLSLC(1, size = imageSize(output_img[plane]); 
);
+GLSLC(1, if (!IS_WITHIN(pos, size))   
);
+GLSLC(2, return;  
);
+GLSLC(0,  
);
+GLSLC(1, src = texture(input_img[plane], pos);
);
+GLSLC(0,  
);
+for (int c = 0; c < desc->nb_components; c++) {
+int off = desc->comp[c].offset / (FFALIGN(desc->comp[c].depth, 8)/8);
+GLSLF(1, if (plane == %i) {
  ,desc->comp[c].plane);
+GLSLF(2, w_sum = weights_%i[pos.y*ws_stride[%i] + pos.x];  
 ,c, c);
+GLSLF(2, sum = sums_%i[pos.y*ws_stride[%i] + pos.x];   
 ,c, c);
+GLSLF(2, r[%i] = (sum + src[%i]*255) / (1.0 + w_sum) / 255;
 ,off, off);
+GLSLC(1, } 
  );
+GLSLC(0,   
  );
 }
-
+GLSLC(1, imageStore(output_img[plane], pos, r);   
);
 GLSLC(0, }
);
 
 RET(spv->compile_shader(spv, vkctx, shd, _data, _len, "main", 
_opaque));
@@ -716,7 +717,7 @@ static int denoise_pass(NLMeansVulkanContext *s, 
FFVkExecContext *exec,
 vk->CmdDispatch(exec->buf,
 FFALIGN(vkctx->output_width,  
s->pl_denoise.wg_size[0])/s->pl_denoise.wg_size[0],
 FFALIGN(vkctx->output_height, 
s->pl_denoise.wg_size[1])/s->pl_denoise.wg_size[1],
-1);
+av_pix_fmt_count_planes(s->vkctx.output_format));
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] nlmeans_vulkan: parallelize workgroup invocations

2023-10-11 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Sep 15 21:55:59 2023 
+0200| [f31d0f11417067a3fc9d53085c32f4ba82b252e4] | committer: Lynne

nlmeans_vulkan: parallelize workgroup invocations

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f31d0f11417067a3fc9d53085c32f4ba82b252e4
---

 libavfilter/Makefile   |   3 +-
 libavfilter/vf_nlmeans_vulkan.c| 438 +++--
 libavfilter/vulkan/prefix_sum.comp | 151 -
 3 files changed, 224 insertions(+), 368 deletions(-)

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 9a100cd665..603b532ad0 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -395,8 +395,7 @@ OBJS-$(CONFIG_MULTIPLY_FILTER)   += 
vf_multiply.o
 OBJS-$(CONFIG_NEGATE_FILTER) += vf_negate.o
 OBJS-$(CONFIG_NLMEANS_FILTER)+= vf_nlmeans.o
 OBJS-$(CONFIG_NLMEANS_OPENCL_FILTER) += vf_nlmeans_opencl.o opencl.o 
opencl/nlmeans.o
-OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vf_nlmeans_vulkan.o vulkan.o 
vulkan_filter.o \
-vulkan/prefix_sum.o
+OBJS-$(CONFIG_NLMEANS_VULKAN_FILTER) += vf_nlmeans_vulkan.o vulkan.o 
vulkan_filter.o
 OBJS-$(CONFIG_NNEDI_FILTER)  += vf_nnedi.o
 OBJS-$(CONFIG_NOFORMAT_FILTER)   += vf_format.o
 OBJS-$(CONFIG_NOISE_FILTER)  += vf_noise.o
diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 9741dd67ac..2b8f97d7d9 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -38,9 +38,10 @@ typedef struct NLMeansVulkanContext {
 VkSampler sampler;
 
 AVBufferPool *integral_buf_pool;
-AVBufferPool *state_buf_pool;
 AVBufferPool *ws_buf_pool;
 
+FFVkBuffer xyoffsets_buf;
+
 int pl_weights_rows;
 FFVulkanPipeline pl_weights;
 FFVkSPIRVShader shd_weights;
@@ -66,107 +67,97 @@ typedef struct NLMeansVulkanContext {
 
 extern const char *ff_source_prefix_sum_comp;
 
-static void insert_first(FFVkSPIRVShader *shd, int r, int horiz, int plane, 
int comp)
+static void insert_first(FFVkSPIRVShader *shd, int r, const char *off, int 
horiz, int plane, int comp)
 {
-GLSLF(2, s1= texture(input_img[%i], ivec2(x + %i, y + %i))[%i];
-  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
-
-if (TYPE_ELEMS == 4) {
-GLSLF(2, s2[0] = texture(input_img[%i], ivec2(x + %i + xoffs[0], y + 
%i + yoffs[0]))[%i];
-  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
-GLSLF(2, s2[1] = texture(input_img[%i], ivec2(x + %i + xoffs[1], y + 
%i + yoffs[1]))[%i];
-  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
-GLSLF(2, s2[2] = texture(input_img[%i], ivec2(x + %i + xoffs[2], y + 
%i + yoffs[2]))[%i];
-  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
-GLSLF(2, s2[3] = texture(input_img[%i], ivec2(x + %i + xoffs[3], y + 
%i + yoffs[3]))[%i];
-  ,plane, horiz ? r : 0, !horiz ? r : 0, comp);
-} else {
-for (int i = 0; i < 16; i++) {
-GLSLF(2, s2[%i][%i] = texture(input_img[%i], ivec2(x + %i + 
xoffs[%i], y + %i + yoffs[%i]))[%i];
-  ,i / 4, i % 4, plane, horiz ? r : 0, i, !horiz ? r : 0, i, 
comp);
-}
-}
-
-GLSLC(2, s2 = (s1 - s2) * (s1 - s2);   
);
+GLSLF(4, s1= texture(input_img[%i], pos + ivec2(%i + %s, %i + %s))[%i];
+  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? 
off : "0", comp);
+
+GLSLF(4, s2[0] = texture(input_img[%i], pos + offs[0] + ivec2(%i + %s, %i 
+ %s))[%i];
+  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? 
off : "0", comp);
+GLSLF(4, s2[1] = texture(input_img[%i], pos + offs[1] + ivec2(%i + %s, %i 
+ %s))[%i];
+  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? 
off : "0", comp);
+GLSLF(4, s2[2] = texture(input_img[%i], pos + offs[2] + ivec2(%i + %s, %i 
+ %s))[%i];
+  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? 
off : "0", comp);
+GLSLF(4, s2[3] = texture(input_img[%i], pos + offs[3] + ivec2(%i + %s, %i 
+ %s))[%i];
+  ,plane, horiz ? r : 0, horiz ? off : "0", !horiz ? r : 0, !horiz ? 
off : "0", comp);
+
+GLSLC(4, s2 = (s1 - s2) * (s1 - s2);   
 );
 }
 
 static void insert_horizontal_pass(FFVkSPIRVShader *shd, int nb_rows, int 
first, int plane, int comp)
 {
-GLSLF(1, x = int(gl_GlobalInvocationID.x) * %i;   
,nb_rows);
-if (!first) {
-GLSLC(1, controlBarrier(gl_ScopeWorkgroup, gl_ScopeWorkgroup,
-gl_StorageSemanticsBuffer,
-gl_SemanticsAcquireRelease |
-gl_SemanticsMakeAvailabl

[FFmpeg-cvslog] nlmeans_vulkan: fix width/height for chroma plane weights calculation

2023-10-11 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Sep 16 00:42:53 2023 
+0200| [6bc8ff7d937cd98c8f1c855b0d10be525b45a5ce] | committer: Lynne

nlmeans_vulkan: fix width/height for chroma plane weights calculation

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6bc8ff7d937cd98c8f1c855b0d10be525b45a5ce
---

 libavfilter/vf_nlmeans_vulkan.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 99f4f867e7..5b623eb7a6 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -100,7 +100,7 @@ static void insert_horizontal_pass(FFVkSPIRVShader *shd, 
int nb_rows, int first,
 gl_SemanticsMakeAvailable |
 gl_SemanticsMakeVisible); 
);
 }
-GLSLC(1, for (y = 0; y < height[0]; y++) {
);
+GLSLF(1, for (y = 0; y < height[%i]; y++) {   
,plane);
 GLSLC(2, offset = uint64_t(int_stride)*y*T_ALIGN; 
);
 GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + offset);  
);
 GLSLC(0,  
);
@@ -127,7 +127,7 @@ static void insert_vertical_pass(FFVkSPIRVShader *shd, int 
nb_rows, int first, i
 gl_SemanticsMakeAvailable |
 gl_SemanticsMakeVisible); 
);
 }
-GLSLC(1, for (x = 0; x < width[0]; x++) { 
);
+GLSLF(1, for (x = 0; x < width[%i]; x++) {
,plane);
 GLSLC(2, dst = DataBuffer(uint64_t(integral_data) + x*T_ALIGN);   
);
 
 for (int r = 0; r < nb_rows; r++) {
@@ -156,13 +156,13 @@ static void insert_weights_pass(FFVkSPIRVShader *shd, int 
nb_rows, int vert,
 gl_SemanticsMakeVisible); 
);
 GLSLC(1, barrier();   
);
 if (!vert) {
-GLSLC(1, for (y = 0; y < height[0]; y++) {
);
+GLSLF(1, for (y = 0; y < height[%i]; y++) {   
,plane);
 GLSLF(2, if (gl_GlobalInvocationID.x*%i >= width[%i]) 
,nb_rows, plane);
 GLSLC(3, break;   
);
 GLSLF(2, for (r = 0; r < %i; r++) {   
,nb_rows);
 GLSLF(3, x = int(gl_GlobalInvocationID.x) * %i + r;   
,nb_rows);
 } else {
-GLSLC(1, for (x = 0; x < width[0]; x++) { 
);
+GLSLF(1, for (x = 0; x < width[%i]; x++) {
,plane);
 GLSLF(2, if (gl_GlobalInvocationID.x*%i >= height[%i])
,nb_rows, plane);
 GLSLC(3, break;   
);
 GLSLF(2, for (r = 0; r < %i; r++) {   
,nb_rows);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] hwcontext_vulkan: properly support STORAGE usage for mutliplane images

2023-10-05 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Oct  5 20:25:33 2023 
+0200| [81cc0e13455baa5e6547df40dc5961d415d6fcd2] | committer: Lynne

hwcontext_vulkan: properly support STORAGE usage for mutliplane images

Fixes multiplane support on Nvidia.

Also, remove the ENCODE usage, even if the driver signals it as supported.
Currently, it's not used, and when it is used, it'll be gated behind
two extension checks.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=81cc0e13455baa5e6547df40dc5961d415d6fcd2
---

 libavfilter/vulkan_filter.c  | 24 +++-
 libavutil/hwcontext_vulkan.c | 36 
 2 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/libavfilter/vulkan_filter.c b/libavfilter/vulkan_filter.c
index b4d8f952b5..f3f40465be 100644
--- a/libavfilter/vulkan_filter.c
+++ b/libavfilter/vulkan_filter.c
@@ -42,18 +42,23 @@ int ff_vk_filter_init_context(AVFilterContext *avctx, 
FFVulkanContext *s,
 vk_frames = frames_ctx->hwctx;
 vk_dev = device_ctx->hwctx;
 
-/* Basic format validation */
+/* Width and height mismatch */
 if (width != frames_ctx->width ||
-height != frames_ctx->height ||
-sw_format != frames_ctx->sw_format ||
-(vk_frames->tiling != VK_IMAGE_TILING_LINEAR &&
- vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL) ||
-!(vk_frames->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) {
+height != frames_ctx->height)
+goto skip;
+
+/* Format mismatch */
+if (sw_format != frames_ctx->sw_format)
 goto skip;
-}
 
-if (vk_frames->usage & VK_IMAGE_USAGE_STORAGE_BIT)
-goto accept;
+/* Unusual tiling mismatch. Don't let linear through either. */
+if (vk_frames->tiling != VK_IMAGE_TILING_OPTIMAL)
+goto skip;
+
+/* Usage mismatch */
+if ((vk_frames->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | 
VK_IMAGE_USAGE_STORAGE_BIT)) !=
+(VK_IMAGE_USAGE_SAMPLED_BIT | 
VK_IMAGE_USAGE_STORAGE_BIT))
+goto skip;
 
 s->extensions = 
ff_vk_extensions_to_mask(vk_dev->enabled_dev_extensions,
  
vk_dev->nb_enabled_dev_extensions);
@@ -110,6 +115,7 @@ accept:
 vk_frames = frames_ctx->hwctx;
 vk_frames->tiling = VK_IMAGE_TILING_OPTIMAL;
 vk_frames->usage  = VK_IMAGE_USAGE_SAMPLED_BIT |
+VK_IMAGE_USAGE_STORAGE_BIT |
 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
 VK_IMAGE_USAGE_TRANSFER_DST_BIT;
 
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index c676f4fc57..b955ec483e 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -282,9 +282,11 @@ FN_MAP_TO(VkImageUsageFlags, usage, 
VkFormatFeatureFlagBits2, feats)
 
 static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, enum AVPixelFormat p,
   VkImageTiling tiling,
-  VkFormat fmts[AV_NUM_DATA_POINTERS],
-  int *nb_images, VkImageAspectFlags *aspect,
-  VkImageUsageFlags *supported_usage, int 
disable_multiplane)
+  VkFormat fmts[AV_NUM_DATA_POINTERS], /* Output 
format list */
+  int *nb_images,  /* Output 
number of images */
+  VkImageAspectFlags *aspect,  /* Output 
aspect */
+  VkImageUsageFlags *supported_usage,  /* Output 
supported usage */
+  int disable_multiplane, int need_storage)
 {
 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
 VulkanDevicePriv *priv = dev_ctx->internal->priv;
@@ -301,6 +303,7 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, 
enum AVPixelFormat p,
 };
 VkFormatFeatureFlagBits2 feats_primary, feats_secondary;
 int basics_primary = 0, basics_secondary = 0;
+int storage_primary = 0, storage_secondary = 0;
 
 vk->GetPhysicalDeviceFormatProperties2(hwctx->phys_dev,
vk_formats_list[i].vkf,
@@ -310,6 +313,7 @@ static int vkfmt_from_pixfmt2(AVHWDeviceContext *dev_ctx, 
enum AVPixelFormat p,
  prop.formatProperties.linearTilingFeatures :
  prop.formatProperties.optimalTilingFeatures;
 basics_primary = (feats_primary & basic_flags) == basic_flags;
+storage_primary = !!(feats_primary & 
VK_FORMAT_FEATURE_2_STORAGE_IMAGE_BIT);
 
 if (vk_formats_list[i].vkf != vk_formats_list[i].fallback[0]) {
 vk->GetPhysicalDeviceFormatProperties2(hwctx->ph

[FFmpeg-cvslog] vulkan_decode: don't call get_proc_addr on every frame's destruction

2023-09-15 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Sep 15 02:22:00 2023 
+0200| [9310ffc809d02d7bbc767555c2ed16311623ffe2] | committer: Lynne

vulkan_decode: don't call get_proc_addr on every frame's destruction

The issue is that we cannot rely on any context existing when we free
frames. The Vulkan functions are loaded in each context separately,
so until now, we've just been loading them on every frame's destruction.

Rather than do this, just save the function pointers we need in each
frame. The function pointers are guaranteed to not change and exist.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9310ffc809d02d7bbc767555c2ed16311623ffe2
---

 libavcodec/vulkan_decode.c | 18 +++---
 libavcodec/vulkan_decode.h |  4 
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 21bebb1677..ef4a1c3809 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -176,6 +176,7 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, 
AVFrame *pic,
 {
 int err;
 FFVulkanDecodeShared *ctx = (FFVulkanDecodeShared *)dec->shared_ref->data;
+FFVulkanFunctions *vk = >s.vkfn;
 
 vkpic->slices_size = 0;
 
@@ -189,6 +190,9 @@ int ff_vk_decode_prepare_frame(FFVulkanDecodeContext *dec, 
AVFrame *pic,
 vkpic->img_view_out  = NULL;
 vkpic->img_view_dest = NULL;
 
+vkpic->destroy_image_view = vk->DestroyImageView;
+vkpic->wait_semaphores = vk->WaitSemaphores;
+
 if (dec->layered_dpb && alloc_dpb) {
 vkpic->img_view_ref = ctx->layered_view;
 vkpic->img_aspect_ref = ctx->layered_aspect;
@@ -554,9 +558,6 @@ int ff_vk_decode_frame(AVCodecContext *avctx,
 void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, FFVulkanDecodePicture 
*vp)
 {
 AVVulkanDeviceContext *hwctx = dev_ctx->hwctx;
-PFN_vkGetDeviceProcAddr device_proc_addr;
-PFN_vkWaitSemaphores wait_semaphores;
-PFN_vkDestroyImageView destroy_image_view;
 
 VkSemaphoreWaitInfo sem_wait = (VkSemaphoreWaitInfo) {
 .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
@@ -565,27 +566,22 @@ void ff_vk_decode_free_frame(AVHWDeviceContext *dev_ctx, 
FFVulkanDecodePicture *
 .semaphoreCount = 1,
 };
 
-/* Guaranteed to exist */
-device_proc_addr = 
(PFN_vkGetDeviceProcAddr)hwctx->get_proc_addr(hwctx->inst, 
"vkGetDeviceProcAddr");
-destroy_image_view = 
(PFN_vkDestroyImageView)device_proc_addr(hwctx->act_dev, "vkDestroyImageView");
-wait_semaphores = (PFN_vkWaitSemaphores)device_proc_addr(hwctx->act_dev, 
"vkWaitSemaphores");
-
 /* We do not have to lock the frame here because we're not interested
  * in the actual current semaphore value, but only that it's later than
  * the time we submitted the image for decoding. */
 if (vp->sem)
-wait_semaphores(hwctx->act_dev, _wait, UINT64_MAX);
+vp->wait_semaphores(hwctx->act_dev, _wait, UINT64_MAX);
 
 /* Free slices data */
 av_buffer_unref(>slices_buf);
 
 /* Destroy image view (out) */
 if (vp->img_view_out && vp->img_view_out != vp->img_view_dest)
-destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
+vp->destroy_image_view(hwctx->act_dev, vp->img_view_out, hwctx->alloc);
 
 /* Destroy image view (ref, unlayered) */
 if (vp->img_view_dest)
-destroy_image_view(hwctx->act_dev, vp->img_view_dest, hwctx->alloc);
+vp->destroy_image_view(hwctx->act_dev, vp->img_view_dest, 
hwctx->alloc);
 
 av_frame_free(>dpb_frame);
 }
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index 71ba3dbd84..c983b44029 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -97,6 +97,10 @@ typedef struct FFVulkanDecodePicture {
 /* Slice data */
 AVBufferRef*slices_buf;
 size_t  slices_size;
+
+/* Vulkan functions needed for destruction, as no other context is 
guaranteed to exist */
+PFN_vkWaitSemaphoreswait_semaphores;
+PFN_vkDestroyImageView  destroy_image_view;
 } FFVulkanDecodePicture;
 
 /**

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_hevc: switch from a buffer pool to a malloc and simplify

2023-09-15 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Sep 15 01:51:49 2023 
+0200| [552a5fa496933c2679cac6774e483bee3f5c2c53] | committer: Lynne

vulkan_hevc: switch from a buffer pool to a malloc and simplify

Simpler and more robust now that contexts are not shared between threads.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=552a5fa496933c2679cac6774e483bee3f5c2c53
---

 libavcodec/vulkan_decode.c |  2 +-
 libavcodec/vulkan_decode.h |  4 ++--
 libavcodec/vulkan_hevc.c   | 52 +++---
 3 files changed, 20 insertions(+), 38 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 534a76edda..21bebb1677 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -1104,7 +1104,7 @@ int ff_vk_decode_uninit(AVCodecContext *avctx)
 /* Wait on and free execution pool */
 ff_vk_exec_pool_free(>s, >exec_pool);
 
-av_buffer_pool_uninit(>tmp_pool);
+av_freep(>hevc_headers);
 av_buffer_unref(>session_params);
 av_buffer_unref(>shared_ref);
 av_freep(>slice_off);
diff --git a/libavcodec/vulkan_decode.h b/libavcodec/vulkan_decode.h
index abf08a98bf..71ba3dbd84 100644
--- a/libavcodec/vulkan_decode.h
+++ b/libavcodec/vulkan_decode.h
@@ -64,8 +64,8 @@ typedef struct FFVulkanDecodeContext {
 uint32_t frame_id_alloc_mask; /* For AV1 only */
 
 /* Thread-local state below */
-AVBufferPool *tmp_pool; /* Pool for temporary data, if needed (HEVC) */
-size_t tmp_pool_ele_size;
+struct HEVCHeaderSet *hevc_headers;
+size_t hevc_headers_size;
 
 uint32_t   *slice_off;
 unsigned intslice_off_max;
diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
index ef371bda67..52f223ceb2 100644
--- a/libavcodec/vulkan_hevc.c
+++ b/libavcodec/vulkan_hevc.c
@@ -68,49 +68,33 @@ typedef struct HEVCHeaderSet {
 HEVCHeaderVPS *hvps;
 } HEVCHeaderSet;
 
-static int get_data_set_buf(FFVulkanDecodeContext *s, AVBufferRef **data_buf,
-int nb_vps, AVBufferRef * const 
vps_list[HEVC_MAX_VPS_COUNT])
+static int alloc_hevc_header_structs(FFVulkanDecodeContext *s,
+ int nb_vps,
+ AVBufferRef * const 
vps_list[HEVC_MAX_VPS_COUNT])
 {
 uint8_t *data_ptr;
 HEVCHeaderSet *hdr;
 
-size_t base_size = 
sizeof(StdVideoH265SequenceParameterSet)*HEVC_MAX_SPS_COUNT +
-   sizeof(HEVCHeaderSPS)*HEVC_MAX_SPS_COUNT +
-   
sizeof(StdVideoH265PictureParameterSet)*HEVC_MAX_PPS_COUNT +
-   sizeof(HEVCHeaderPPS)*HEVC_MAX_PPS_COUNT +
-   
sizeof(StdVideoH265VideoParameterSet)*HEVC_MAX_VPS_COUNT +
-   sizeof(HEVCHeaderVPS *);
-
-size_t vps_size = sizeof(StdVideoH265ProfileTierLevel) +
-  sizeof(StdVideoH265DecPicBufMgr) +
-  sizeof(StdVideoH265HrdParameters)*HEVC_MAX_LAYER_SETS +
-  sizeof(HEVCHeaderVPSSet *);
-
-size_t buf_size = base_size + vps_size*nb_vps;
-
+size_t buf_size = sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS);
 for (int i = 0; i < nb_vps; i++) {
 const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data;
 buf_size += sizeof(HEVCHeaderVPSSet)*vps->vps_num_hrd_parameters;
 }
 
-if (buf_size > s->tmp_pool_ele_size) {
-av_buffer_pool_uninit(>tmp_pool);
-s->tmp_pool_ele_size = 0;
-s->tmp_pool = av_buffer_pool_init(buf_size, NULL);
-if (!s->tmp_pool)
+if (buf_size > s->hevc_headers_size) {
+av_freep(>hevc_headers);
+s->hevc_headers_size = 0;
+s->hevc_headers = av_mallocz(buf_size);
+if (!s->hevc_headers)
 return AVERROR(ENOMEM);
-s->tmp_pool_ele_size = buf_size;
+s->hevc_headers_size = buf_size;
 }
 
-*data_buf = av_buffer_pool_get(s->tmp_pool);
-if (!(*data_buf))
-return AVERROR(ENOMEM);
-
-/* Setup pointers */
-data_ptr = (*data_buf)->data;
-hdr = (HEVCHeaderSet *)data_ptr;
-hdr->hvps = (HEVCHeaderVPS *)(data_ptr + base_size);
-data_ptr += base_size + vps_size*nb_vps;
+/* Setup struct pointers */
+hdr = s->hevc_headers;
+data_ptr = (uint8_t *)hdr;
+hdr->hvps = (HEVCHeaderVPS *)(data_ptr + sizeof(HEVCHeaderSet));
+data_ptr += sizeof(HEVCHeaderSet) + nb_vps*sizeof(HEVCHeaderVPS);
 for (int i = 0; i < nb_vps; i++) {
 const HEVCVPS *vps = (const HEVCVPS *)vps_list[i]->data;
 hdr->hvps[i].sls = (HEVCHeaderVPSSet *)data_ptr;
@@ -672,17 +656,16 @@ static int vk_hevc_create_params(AVCodecContext *avctx, 
AVBufferRef **buf)
 };
 
 int nb_vps = 0;
-AVBufferRef *data_set;
 HEVCHeaderSet *hdr;
 
 for (int i = 0; h->ps.vps_list[i]; i++)
 nb_vps++;
 
-err = ge

[FFmpeg-cvslog] lavu/tx: add missing prints for the type of dctI/dstI

2023-09-07 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Sep  3 16:47:53 2023 
+0200| [00e77fd21a0327404f61bab19c50d73a7bec5007] | committer: Lynne

lavu/tx: add missing prints for the type of dctI/dstI

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=00e77fd21a0327404f61bab19c50d73a7bec5007
---

 libavutil/tx.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/libavutil/tx.c b/libavutil/tx.c
index e9826e6107..24b2015b44 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -578,12 +578,18 @@ static void print_type(AVBPrint *bp, enum AVTXType type)
type == AV_TX_FLOAT_FFT   ? "fft_float"   :
type == AV_TX_FLOAT_MDCT  ? "mdct_float"  :
type == AV_TX_FLOAT_RDFT  ? "rdft_float"  :
+   type == AV_TX_FLOAT_DCT_I ? "dctI_float"  :
+   type == AV_TX_FLOAT_DST_I ? "dstI_float"  :
type == AV_TX_DOUBLE_FFT  ? "fft_double"  :
type == AV_TX_DOUBLE_MDCT ? "mdct_double" :
type == AV_TX_DOUBLE_RDFT ? "rdft_double" :
+   type == AV_TX_DOUBLE_DCT_I ? "dctI_double" :
+   type == AV_TX_DOUBLE_DST_I ? "dstI_double" :
type == AV_TX_INT32_FFT   ? "fft_int32"   :
type == AV_TX_INT32_MDCT  ? "mdct_int32"  :
type == AV_TX_INT32_RDFT  ? "rdft_int32"  :
+   type == AV_TX_INT32_DCT_I ? "dctI_int32" :
+   type == AV_TX_INT32_DST_I ? "dstI_int32" :
"unknown");
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan_decode: convert max level from vulkan to av for comparisons

2023-09-07 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Sep  6 06:15:32 2023 
+0200| [398467f519b3b4e954ac9e6868358c052b407fc5] | committer: Lynne

vulkan_decode: convert max level from vulkan to av for comparisons

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=398467f519b3b4e954ac9e6868358c052b407fc5
---

 libavcodec/vulkan_decode.c |  4 ++--
 libavcodec/vulkan_video.c  | 45 +
 libavcodec/vulkan_video.h  |  7 +++
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/libavcodec/vulkan_decode.c b/libavcodec/vulkan_decode.c
index 9a57369df7..3986330c81 100644
--- a/libavcodec/vulkan_decode.c
+++ b/libavcodec/vulkan_decode.c
@@ -837,8 +837,8 @@ static int vulkan_decode_get_profile(AVCodecContext *avctx, 
AVBufferRef *frames_
 return AVERROR_EXTERNAL;
 }
 
-max_level = avctx->codec_id == AV_CODEC_ID_H264 ? h264_caps.maxLevelIdc :
-avctx->codec_id == AV_CODEC_ID_H265 ? h265_caps.maxLevelIdc :
+max_level = avctx->codec_id == AV_CODEC_ID_H264 ? 
ff_vk_h264_level_to_av(h264_caps.maxLevelIdc) :
+avctx->codec_id == AV_CODEC_ID_H265 ? 
ff_vk_h265_level_to_av(h265_caps.maxLevelIdc) :
 avctx->codec_id == AV_CODEC_ID_AV1  ? av1_caps.maxLevelIdc  :
 0;
 
diff --git a/libavcodec/vulkan_video.c b/libavcodec/vulkan_video.c
index 9a363aab02..236aa124bb 100644
--- a/libavcodec/vulkan_video.c
+++ b/libavcodec/vulkan_video.c
@@ -154,6 +154,51 @@ VkVideoComponentBitDepthFlagBitsKHR 
ff_vk_depth_from_av_depth(int depth)
 return VK_VIDEO_COMPONENT_BIT_DEPTH_INVALID_KHR;
 }
 
+int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level)
+{
+switch (level) {
+case STD_VIDEO_H264_LEVEL_IDC_1_0: return 10;
+case STD_VIDEO_H264_LEVEL_IDC_1_1: return 11;
+case STD_VIDEO_H264_LEVEL_IDC_1_2: return 12;
+case STD_VIDEO_H264_LEVEL_IDC_1_3: return 13;
+case STD_VIDEO_H264_LEVEL_IDC_2_0: return 20;
+case STD_VIDEO_H264_LEVEL_IDC_2_1: return 21;
+case STD_VIDEO_H264_LEVEL_IDC_2_2: return 22;
+case STD_VIDEO_H264_LEVEL_IDC_3_0: return 30;
+case STD_VIDEO_H264_LEVEL_IDC_3_1: return 31;
+case STD_VIDEO_H264_LEVEL_IDC_3_2: return 32;
+case STD_VIDEO_H264_LEVEL_IDC_4_0: return 40;
+case STD_VIDEO_H264_LEVEL_IDC_4_1: return 41;
+case STD_VIDEO_H264_LEVEL_IDC_4_2: return 42;
+case STD_VIDEO_H264_LEVEL_IDC_5_0: return 50;
+case STD_VIDEO_H264_LEVEL_IDC_5_1: return 51;
+case STD_VIDEO_H264_LEVEL_IDC_5_2: return 52;
+case STD_VIDEO_H264_LEVEL_IDC_6_0: return 60;
+case STD_VIDEO_H264_LEVEL_IDC_6_1: return 61;
+default:
+case STD_VIDEO_H264_LEVEL_IDC_6_2: return 62;
+}
+}
+
+int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level)
+{
+switch (level) {
+case STD_VIDEO_H265_LEVEL_IDC_1_0: return 10;
+case STD_VIDEO_H265_LEVEL_IDC_2_0: return 20;
+case STD_VIDEO_H265_LEVEL_IDC_2_1: return 21;
+case STD_VIDEO_H265_LEVEL_IDC_3_0: return 30;
+case STD_VIDEO_H265_LEVEL_IDC_3_1: return 31;
+case STD_VIDEO_H265_LEVEL_IDC_4_0: return 40;
+case STD_VIDEO_H265_LEVEL_IDC_4_1: return 41;
+case STD_VIDEO_H265_LEVEL_IDC_5_0: return 50;
+case STD_VIDEO_H265_LEVEL_IDC_5_1: return 51;
+case STD_VIDEO_H265_LEVEL_IDC_6_0: return 60;
+case STD_VIDEO_H265_LEVEL_IDC_6_1: return 61;
+default:
+case STD_VIDEO_H265_LEVEL_IDC_6_2: return 62;
+}
+}
+
 static void free_data_buf(void *opaque, uint8_t *data)
 {
 FFVulkanContext *ctx = opaque;
diff --git a/libavcodec/vulkan_video.h b/libavcodec/vulkan_video.h
index 183ce89bf0..b28e3fe0bd 100644
--- a/libavcodec/vulkan_video.h
+++ b/libavcodec/vulkan_video.h
@@ -71,6 +71,13 @@ VkVideoChromaSubsamplingFlagBitsKHR 
ff_vk_subsampling_from_av_desc(const AVPixFm
  */
 VkVideoComponentBitDepthFlagBitsKHR ff_vk_depth_from_av_depth(int depth);
 
+
+/**
+ * Convert level from Vulkan to AV.
+ */
+int ff_vk_h264_level_to_av(StdVideoH264LevelIdc level);
+int ff_vk_h265_level_to_av(StdVideoH265LevelIdc level);
+
 typedef struct FFVkVideoBuffer {
 FFVkBuffer buf;
 uint8_t *mem;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavu/tx: fix scaling of R2R transforms

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Sep  1 06:13:51 2023 
+0200| [d40672e661037c770f50e45c8c09f523ed9a77d0] | committer: Lynne

lavu/tx: fix scaling of R2R transforms

Still slightly inaccurate, but it's good enough now.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d40672e661037c770f50e45c8c09f523ed9a77d0
---

 libavutil/tx_template.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 4745b56700..6e3b3dad33 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1613,6 +1613,7 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext 
*s,
 int ret;
 double f, m;
 TXSample *tab;
+uint64_t r2r = flags & AV_TX_REAL_TO_REAL;
 int len4 = FFALIGN(len, 4) / 4;
 
 s->scale_d = *((SCALE_TYPE *)scale);
@@ -1638,7 +1639,10 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext 
*s,
 *tab++ = RESCALE(-m);
 
 *tab++ = RESCALE( (0.5 - 0.0) * m);
-*tab++ = RESCALE( (0.0 - 0.5) * m);
+if (r2r)
+*tab++ = 1 / s->scale_f;
+else
+*tab++ = RESCALE( (0.0 - 0.5) * m);
 *tab++ = RESCALE( (0.5 - inv) * m);
 *tab++ = RESCALE(-(0.5 - inv) * m);
 
@@ -1804,7 +1808,7 @@ static void TX_NAME(ff_tx_rdft_ ##n)(AVTXContext *s, void 
*_dst,   \
 if (mode == AV_TX_REAL_TO_REAL) {  
\
 out[len2] = tmp_dc;
\
 if (mod2)  
\
-out[len4 + 1] = tmp_mid;   
\
+out[len4 + 1] = tmp_mid * fact[5]; 
\
 } else if (mod2) { 
\
 out[len4] = tmp_mid;   
\
 }  
\

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavc/avfft: deprecate the API

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Jul 24 23:55:55 2023 
+0200| [139e54911c8356729e5cfad4283da9abb90b53e0] | committer: Lynne

lavc/avfft: deprecate the API

This deprecates the currently unused API.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=139e54911c8356729e5cfad4283da9abb90b53e0
---

 doc/APIchanges |  5 +
 libavcodec/avfft.h | 31 +++
 libavcodec/tests/fft.c |  6 ++
 libavcodec/version.h   |  2 +-
 libavcodec/version_major.h |  2 ++
 5 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index db3242b667..9032164d93 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,11 @@ The last version increases of all libraries were on 2023-02-09
 
 API changes, most recent first:
 
+2023-09-xx - xx - lavc 60.25.100 - avfft.h
+  The entire header will be deprecated and removed in two major bumps.
+  For a replacement to av_dct, av_rdft, av_fft and av_mdct, use
+  the new API from libavutil/tx.h.
+
 2023-07-xx - xx - lavu 58.18.100 - tx.h
   Add AV_TX_REAL_TO_REAL and AV_TX_REAL_TO_IMAGINARY
 
diff --git a/libavcodec/avfft.h b/libavcodec/avfft.h
index 0c0f9b8d8d..e3a0da1eb9 100644
--- a/libavcodec/avfft.h
+++ b/libavcodec/avfft.h
@@ -19,6 +19,10 @@
 #ifndef AVCODEC_AVFFT_H
 #define AVCODEC_AVFFT_H
 
+#include "libavutil/attributes.h"
+#include "version_major.h"
+#if FF_API_AVFFT
+
 /**
  * @file
  * @ingroup lavc_fft
@@ -44,26 +48,42 @@ typedef struct FFTContext FFTContext;
  * Set up a complex FFT.
  * @param nbits   log2 of the length of the input array
  * @param inverse if 0 perform the forward transform, if 1 perform the 
inverse
+ * @deprecated use av_tx_init from libavutil/tx.h with a type of 
AV_TX_FLOAT_FFT
  */
+attribute_deprecated
 FFTContext *av_fft_init(int nbits, int inverse);
 
 /**
  * Do the permutation needed BEFORE calling ff_fft_calc().
+ * @deprecated without replacement
  */
+attribute_deprecated
 void av_fft_permute(FFTContext *s, FFTComplex *z);
 
 /**
  * Do a complex FFT with the parameters defined in av_fft_init(). The
  * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ * @deprecated use the av_tx_fn value returned by av_tx_init, which also does 
permutation
  */
+attribute_deprecated
 void av_fft_calc(FFTContext *s, FFTComplex *z);
 
+attribute_deprecated
 void av_fft_end(FFTContext *s);
 
+/**
+ * @deprecated use av_tx_init from libavutil/tx.h with a type of 
AV_TX_FLOAT_MDCT,
+ * with a flag of AV_TX_FULL_IMDCT for a replacement to av_imdct_calc.
+ */
+attribute_deprecated
 FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+attribute_deprecated
 void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+attribute_deprecated
 void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+attribute_deprecated
 void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+attribute_deprecated
 void av_mdct_end(FFTContext *s);
 
 /* Real Discrete Fourier Transform */
@@ -81,9 +101,14 @@ typedef struct RDFTContext RDFTContext;
  * Set up a real FFT.
  * @param nbits   log2 of the length of the input array
  * @param trans   the type of transform
+ *
+ * @deprecated use av_tx_init from libavutil/tx.h with a type of 
AV_TX_FLOAT_RDFT
  */
+attribute_deprecated
 RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+attribute_deprecated
 void av_rdft_calc(RDFTContext *s, FFTSample *data);
+attribute_deprecated
 void av_rdft_end(RDFTContext *s);
 
 /* Discrete Cosine Transform */
@@ -106,13 +131,19 @@ enum DCTTransformType {
  * @param typethe type of transform
  *
  * @note the first element of the input of DST-I is ignored
+ *
+ * @deprecated use av_tx_init from libavutil/tx.h with an appropriate type of 
AV_TX_FLOAT_DCT
  */
+attribute_deprecated
 DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+attribute_deprecated
 void av_dct_calc(DCTContext *s, FFTSample *data);
+attribute_deprecated
 void av_dct_end (DCTContext *s);
 
 /**
  * @}
  */
 
+#endif /* FF_API_AVFFT */
 #endif /* AVCODEC_AVFFT_H */
diff --git a/libavcodec/tests/fft.c b/libavcodec/tests/fft.c
index 163f3e89c4..0f03c9232d 100644
--- a/libavcodec/tests/fft.c
+++ b/libavcodec/tests/fft.c
@@ -18,6 +18,10 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/internal.h"
+
+FF_DISABLE_DEPRECATION_WARNINGS
+
 /**
  * @file
  * FFT and MDCT tests.
@@ -675,3 +679,5 @@ cleanup:
 
 return !!err;
 }
+
+FF_ENABLE_DEPRECATION_WARNINGS
diff --git a/libavcodec/version.h b/libavcodec/version.h
index e0fe2eb7b8..a744e7469f 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -29,7 +29,7 @@
 
 #include "version_major.h"
 
-#define LIBAVCODEC_VERSION_MINOR  24
+#define LIBAVCODEC_VERSION_MINOR  25
 #def

[FFmpeg-cvslog] avfft: wrap lavu/tx instead of ff_mdct

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Nov 10 11:26:33 2022 
+0100| [517e4fcca6b81e73536d4ef5edfd22ad5886e783] | committer: Lynne

avfft: wrap lavu/tx instead of ff_mdct

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=517e4fcca6b81e73536d4ef5edfd22ad5886e783
---

 libavcodec/avfft.c | 43 ++-
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index e4b19af272..107b510828 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -75,43 +75,60 @@ av_cold void av_fft_end(FFTContext *s)
 }
 }
 
-#if CONFIG_MDCT
-
 FFTContext *av_mdct_init(int nbits, int inverse, double scale)
 {
-FFTContext *s = av_malloc(sizeof(*s));
+int ret;
+float scale_f = scale;
+AVTXWrapper *s = av_malloc(sizeof(*s));
+if (!s)
+return NULL;
 
-if (s && ff_mdct_init(s, nbits, inverse, scale))
-av_freep();
+ret = av_tx_init(>ctx, >fn, AV_TX_FLOAT_MDCT, inverse, 1 << (nbits - 
1), _f, 0);
+if (ret < 0) {
+av_free(s);
+return NULL;
+}
 
-return s;
+if (inverse) {
+ret = av_tx_init(>ctx2, >fn2, AV_TX_FLOAT_MDCT, inverse, 1 << 
(nbits - 1),
+ _f, AV_TX_FULL_IMDCT);
+if (ret < 0) {
+av_tx_uninit(>ctx);
+av_free(s);
+return NULL;
+}
+}
+
+return (FFTContext *)s;
 }
 
 void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
-s->imdct_calc(s, output, input);
+AVTXWrapper *w = (AVTXWrapper *)s;
+w->fn2(w->ctx2, output, (void *)input, sizeof(float));
 }
 
 void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
-s->imdct_half(s, output, input);
+AVTXWrapper *w = (AVTXWrapper *)s;
+w->fn(w->ctx, output, (void *)input, sizeof(float));
 }
 
 void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
 {
-s->mdct_calc(s, output, input);
+AVTXWrapper *w = (AVTXWrapper *)s;
+w->fn(w->ctx, output, (void *)input, sizeof(float));
 }
 
 av_cold void av_mdct_end(FFTContext *s)
 {
 if (s) {
-ff_mdct_end(s);
-av_free(s);
+AVTXWrapper *w = (AVTXWrapper *)s;
+av_tx_uninit(>ctx);
+av_free(w);
 }
 }
 
-#endif /* CONFIG_MDCT */
-
 #if CONFIG_RDFT
 
 RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfft: wrap lavu/tx instead of ff_fft

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Nov 10 11:23:38 2022 
+0100| [dfcd4bbf5118b9fa601ba36e3c5984db61478d5f] | committer: Lynne

avfft: wrap lavu/tx instead of ff_fft

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=dfcd4bbf5118b9fa601ba36e3c5984db61478d5f
---

 libavcodec/avfft.c | 40 +++-
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 2200f37708..e4b19af272 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -18,38 +18,60 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/mem.h"
+#include "libavutil/tx.h"
 #include "avfft.h"
 #include "fft.h"
 #include "rdft.h"
 #include "dct.h"
 
+typedef struct AVTXWrapper {
+AVTXContext *ctx;
+av_tx_fn fn;
+
+AVTXContext *ctx2;
+av_tx_fn fn2;
+
+ptrdiff_t stride;
+} AVTXWrapper;
+
 /* FFT */
 
 FFTContext *av_fft_init(int nbits, int inverse)
 {
-FFTContext *s = av_mallocz(sizeof(*s));
-
-if (s && ff_fft_init(s, nbits, inverse))
-av_freep();
+int ret;
+float scale = 1.0f;
+AVTXWrapper *s = av_malloc(sizeof(*s));
+if (!s)
+return NULL;
+
+ret = av_tx_init(>ctx, >fn, AV_TX_FLOAT_FFT, inverse, 1 << nbits,
+ , AV_TX_INPLACE);
+if (ret < 0) {
+av_free(s);
+return NULL;
+}
 
-return s;
+return (FFTContext *)s;
 }
 
 void av_fft_permute(FFTContext *s, FFTComplex *z)
 {
-s->fft_permute(s, z);
+/* Empty */
 }
 
 void av_fft_calc(FFTContext *s, FFTComplex *z)
 {
-s->fft_calc(s, z);
+AVTXWrapper *w = (AVTXWrapper *)s;
+w->fn(w->ctx, z, (void *)z, sizeof(AVComplexFloat));
 }
 
 av_cold void av_fft_end(FFTContext *s)
 {
 if (s) {
-ff_fft_end(s);
-av_free(s);
+AVTXWrapper *w = (AVTXWrapper *)s;
+av_tx_uninit(>ctx);
+av_tx_uninit(>ctx2);
+av_free(w);
 }
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfft: wrap lavu/tx instead of ff_rdft

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Nov 10 11:26:59 2022 
+0100| [83ede01bb08239428fd65de62adc260f4233d229] | committer: Lynne

avfft: wrap lavu/tx instead of ff_rdft

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=83ede01bb08239428fd65de62adc260f4233d229
---

 libavcodec/avfft.c | 45 ++---
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 107b510828..9f5d256773 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -32,6 +32,8 @@ typedef struct AVTXWrapper {
 av_tx_fn fn2;
 
 ptrdiff_t stride;
+int len;
+int inv;
 } AVTXWrapper;
 
 /* FFT */
@@ -129,33 +131,54 @@ av_cold void av_mdct_end(FFTContext *s)
 }
 }
 
-#if CONFIG_RDFT
-
 RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans)
 {
-RDFTContext *s = av_malloc(sizeof(*s));
+int ret;
+float scale = trans == IDFT_C2R ? 0.5f : 1.0f;
+AVTXWrapper *s;
 
-if (s && ff_rdft_init(s, nbits, trans))
-av_freep();
+/* The other 2 modes are unconventional, do not form an orthogonal
+ * transform, have never been useful, and so they're not implemented. */
+if (trans != IDFT_C2R && trans != DFT_R2C)
+return NULL;
 
-return s;
+s = av_malloc(sizeof(*s));
+if (!s)
+return NULL;
+
+ret = av_tx_init(>ctx, >fn, AV_TX_FLOAT_RDFT, trans == IDFT_C2R,
+ 1 << nbits, , AV_TX_INPLACE);
+if (ret < 0) {
+av_free(s);
+return NULL;
+}
+
+s->stride = (trans == DFT_C2R) ? sizeof(float) : sizeof(AVComplexFloat);
+s->len = 1 << nbits;
+s->inv = trans == IDFT_C2R;
+
+return (RDFTContext *)s;
 }
 
 void av_rdft_calc(RDFTContext *s, FFTSample *data)
 {
-s->rdft_calc(s, data);
+AVTXWrapper *w = (AVTXWrapper *)s;
+if (w->inv)
+FFSWAP(float, data[1], data[w->len]);
+w->fn(w->ctx, data, (void *)data, w->stride);
+if (!w->inv)
+FFSWAP(float, data[1], data[w->len]);
 }
 
 av_cold void av_rdft_end(RDFTContext *s)
 {
 if (s) {
-ff_rdft_end(s);
-av_free(s);
+AVTXWrapper *w = (AVTXWrapper *)s;
+av_tx_uninit(>ctx);
+av_free(w);
 }
 }
 
-#endif /* CONFIG_RDFT */
-
 #if CONFIG_DCT
 
 DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavu/tx: improve rdft table generation precision slightly

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Sep  1 06:06:44 2023 
+0200| [59b39d241e3937f6c87cf68b7ca1e4cc69119347] | committer: Lynne

lavu/tx: improve rdft table generation precision slightly

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=59b39d241e3937f6c87cf68b7ca1e4cc69119347
---

 libavutil/tx_template.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c026cb40c4..4745b56700 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1648,7 +1648,7 @@ static av_cold int TX_NAME(ff_tx_rdft_init)(AVTXContext 
*s,
 tab = ((TXSample *)s->exp) + len4 + 8;
 
 for (int i = 0; i < len4; i++)
-*tab++ = RESCALE(cos(((float)len/4.0 - (float)i + 0)*f) * (inv ? +1.0 
: -1.0));
+*tab++ = RESCALE(cos(((len - i*4)/4.0)*f)) * (inv ? 1 : -1);
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] avfft: wrap lavu/tx instead of ff_dct

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Aug  7 12:07:10 2023 
+0200| [86aa34d3392282986b27dfdbf081ddfebb035604] | committer: Lynne

avfft: wrap lavu/tx instead of ff_dct

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=86aa34d3392282986b27dfdbf081ddfebb035604
---

 libavcodec/avfft.c | 62 --
 1 file changed, 51 insertions(+), 11 deletions(-)

diff --git a/libavcodec/avfft.c b/libavcodec/avfft.c
index 9f5d256773..d07c495022 100644
--- a/libavcodec/avfft.c
+++ b/libavcodec/avfft.c
@@ -34,6 +34,9 @@ typedef struct AVTXWrapper {
 ptrdiff_t stride;
 int len;
 int inv;
+
+float *tmp;
+int out_of_place;
 } AVTXWrapper;
 
 /* FFT */
@@ -179,29 +182,66 @@ av_cold void av_rdft_end(RDFTContext *s)
 }
 }
 
-#if CONFIG_DCT
-
 DCTContext *av_dct_init(int nbits, enum DCTTransformType inverse)
 {
-DCTContext *s = av_malloc(sizeof(*s));
+int ret;
+const float scale_map[] = {
+[DCT_II] = 0.5f,
+[DCT_III] = 1.0f / (1 << nbits),
+[DCT_I] = 0.5f,
+[DST_I] = 2.0f,
+};
+static const enum AVTXType type_map[] = {
+[DCT_II] = AV_TX_FLOAT_DCT,
+[DCT_III] = AV_TX_FLOAT_DCT,
+[DCT_I] = AV_TX_FLOAT_DCT_I,
+[DST_I] = AV_TX_FLOAT_DST_I,
+};
+
+AVTXWrapper *s = av_malloc(sizeof(*s));
+if (!s)
+return NULL;
 
-if (s && ff_dct_init(s, nbits, inverse))
-av_freep();
+s->len = (1 << nbits);
+s->out_of_place = (inverse == DCT_I) || (inverse == DST_I);
 
-return s;
+ret = av_tx_init(>ctx, >fn, type_map[inverse],
+ (inverse == DCT_III), 1 << (nbits - (inverse == DCT_III)),
+ _map[inverse], s->out_of_place ? 0 : AV_TX_INPLACE);
+if (ret < 0) {
+av_free(s);
+return NULL;
+}
+
+if (s->out_of_place) {
+s->tmp = av_malloc((1 << (nbits + 1))*sizeof(float));
+if (!s->tmp) {
+av_tx_uninit(>ctx);
+av_free(s);
+return NULL;
+}
+}
+
+return (DCTContext *)s;
 }
 
 void av_dct_calc(DCTContext *s, FFTSample *data)
 {
-s->dct_calc(s, data);
+AVTXWrapper *w = (AVTXWrapper *)s;
+if (w->out_of_place) {
+memcpy(w->tmp, data, w->len*sizeof(float));
+w->fn(w->ctx, (void *)data, w->tmp, sizeof(float));
+} else {
+w->fn(w->ctx, data, (void *)data, sizeof(float));
+}
 }
 
 av_cold void av_dct_end(DCTContext *s)
 {
 if (s) {
-ff_dct_end(s);
-av_free(s);
+AVTXWrapper *w = (AVTXWrapper *)s;
+av_tx_uninit(>ctx);
+av_free(w->tmp);
+av_free(w);
 }
 }
-
-#endif /* CONFIG_DCT */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] ffplay: port to lavu/tx

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Feb 18 13:14:31 2023 
+0100| [4acd08be6c4f39736179a3d90fd56b508e42ff6d] | committer: Lynne

ffplay: port to lavu/tx

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4acd08be6c4f39736179a3d90fd56b508e42ff6d
---

 fftools/ffplay.c | 42 +++---
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/fftools/ffplay.c b/fftools/ffplay.c
index 5212ad053e..006da7ab57 100644
--- a/fftools/ffplay.c
+++ b/fftools/ffplay.c
@@ -47,7 +47,7 @@
 #include "libavdevice/avdevice.h"
 #include "libswscale/swscale.h"
 #include "libavutil/opt.h"
-#include "libavcodec/avfft.h"
+#include "libavutil/tx.h"
 #include "libswresample/swresample.h"
 
 #include "libavfilter/avfilter.h"
@@ -262,9 +262,11 @@ typedef struct VideoState {
 int16_t sample_array[SAMPLE_ARRAY_SIZE];
 int sample_array_index;
 int last_i_start;
-RDFTContext *rdft;
+AVTXContext *rdft;
+av_tx_fn rdft_fn;
 int rdft_bits;
-FFTSample *rdft_data;
+float *real_data;
+AVComplexFloat *rdft_data;
 int xpos;
 double last_vis_time;
 SDL_Texture *vis_texture;
@@ -1120,6 +1122,7 @@ static void video_audio_display(VideoState *s)
 fill_rectangle(s->xleft, y, s->width, 1);
 }
 } else {
+int err = 0;
 if (realloc_texture(>vis_texture, SDL_PIXELFORMAT_ARGB, 
s->width, s->height, SDL_BLENDMODE_NONE, 1) < 0)
 return;
 
@@ -1127,31 +1130,39 @@ static void video_audio_display(VideoState *s)
 s->xpos = 0;
 nb_display_channels= FFMIN(nb_display_channels, 2);
 if (rdft_bits != s->rdft_bits) {
-av_rdft_end(s->rdft);
-av_free(s->rdft_data);
-s->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+const float rdft_scale = 1.0;
+av_tx_uninit(>rdft);
+av_freep(>real_data);
+av_freep(>rdft_data);
 s->rdft_bits = rdft_bits;
-s->rdft_data = av_malloc_array(nb_freq, 4 *sizeof(*s->rdft_data));
+s->real_data = av_malloc_array(nb_freq, 4 *sizeof(*s->real_data));
+s->rdft_data = av_malloc_array(nb_freq + 1, 2 
*sizeof(*s->rdft_data));
+err = av_tx_init(>rdft, >rdft_fn, AV_TX_FLOAT_RDFT,
+ 0, 1 << rdft_bits, _scale, 0);
 }
-if (!s->rdft || !s->rdft_data){
+if (err < 0 || !s->rdft_data) {
 av_log(NULL, AV_LOG_ERROR, "Failed to allocate buffers for RDFT, 
switching to waves display\n");
 s->show_mode = SHOW_MODE_WAVES;
 } else {
-FFTSample *data[2];
+float *data_in[2];
+AVComplexFloat *data[2];
 SDL_Rect rect = {.x = s->xpos, .y = 0, .w = 1, .h = s->height};
 uint32_t *pixels;
 int pitch;
 for (ch = 0; ch < nb_display_channels; ch++) {
-data[ch] = s->rdft_data + 2 * nb_freq * ch;
+data_in[ch] = s->real_data + 2 * nb_freq * ch;
+data[ch] = s->rdft_data + nb_freq * ch;
 i = i_start + ch;
 for (x = 0; x < 2 * nb_freq; x++) {
 double w = (x-nb_freq) * (1.0 / nb_freq);
-data[ch][x] = s->sample_array[i] * (1.0 - w * w);
+data_in[ch][x] = s->sample_array[i] * (1.0 - w * w);
 i += channels;
 if (i >= SAMPLE_ARRAY_SIZE)
 i -= SAMPLE_ARRAY_SIZE;
 }
-av_rdft_calc(s->rdft, data[ch]);
+s->rdft_fn(s->rdft, data[ch], data_in[ch], sizeof(float));
+data[ch][0].im = data[ch][nb_freq].re;
+data[ch][nb_freq].re = 0;
 }
 /* Least efficient way to do this, we should of course
  * directly access it but it is more than fast enough. */
@@ -1160,8 +1171,8 @@ static void video_audio_display(VideoState *s)
 pixels += pitch * s->height;
 for (y = 0; y < s->height; y++) {
 double w = 1 / sqrt(nb_freq);
-int a = sqrt(w * sqrt(data[0][2 * y + 0] * data[0][2 * y + 
0] + data[0][2 * y + 1] * data[0][2 * y + 1]));
-int b = (nb_display_channels == 2 ) ? sqrt(w * 
hypot(data[1][2 * y + 0], data[1][2 * y + 1]))
+int a = sqrt(w * sqrt(data[0][y].re * data[0][y].re + 
data[0][y].im * data[0][y].im));
+int b = (nb_display_channels == 2 ) ? sqrt(w * 
hypot(data[1][y].re, data[1][y].im))
 : a;
 a = FFMIN(a, 255);
 b = FFMIN(b, 255);
@@ -1197,7 +1208,8 @@ static vo

[FFmpeg-cvslog] lavu/tx: add real to real and real to imaginary RDFT transforms

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Aug  3 18:21:23 2023 
+0200| [11e22730e1eb7ec6b5953c16b3b4df58be14e2de] | committer: Lynne

lavu/tx: add real to real and real to imaginary RDFT transforms

These are in-place transforms, required for DCT-I and DST-I.

Templated as the mod2 variant requires minor modifications, and is
required specifically for DCT-I/DST-I.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=11e22730e1eb7ec6b5953c16b3b4df58be14e2de
---

 doc/APIchanges  |   3 +
 libavutil/tx.c  |  18 -
 libavutil/tx.h  |  10 +++
 libavutil/tx_template.c | 175 ++--
 libavutil/version.h |   2 +-
 5 files changed, 167 insertions(+), 41 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index ad1efe708d..db3242b667 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -2,6 +2,9 @@ The last version increases of all libraries were on 2023-02-09
 
 API changes, most recent first:
 
+2023-07-xx - xx - lavu 58.18.100 - tx.h
+  Add AV_TX_REAL_TO_REAL and AV_TX_REAL_TO_IMAGINARY
+
 2023-08-18 - xx - lavu 58.17.100 - channel_layout.h
   All AV_CHANNEL_LAYOUT_* macros are now compatible with C++ 17 and older.
 
diff --git a/libavutil/tx.c b/libavutil/tx.c
index e25abf998f..e9826e6107 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -437,7 +437,9 @@ int ff_tx_decompose_length(int dst[TX_MAX_DECOMPOSITIONS], 
enum AVTXType type,
 
 /* Check direction for non-orthogonal codelets */
 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
-((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && 
!inv))
+((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && 
!inv) ||
+((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && 
inv) ||
+((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) 
&& inv))
 continue;
 
 /* Check if the CPU supports the required ISA */
@@ -560,6 +562,10 @@ static void print_flags(AVBPrint *bp, uint64_t f)
 av_bprintf(bp, "%spreshuf", prev > 1 ? sep : "");
 if ((f & AV_TX_FULL_IMDCT) && ++prev)
 av_bprintf(bp, "%simdct_full", prev > 1 ? sep : "");
+if ((f & AV_TX_REAL_TO_REAL) && ++prev)
+av_bprintf(bp, "%sreal_to_real", prev > 1 ? sep : "");
+if ((f & AV_TX_REAL_TO_IMAGINARY) && ++prev)
+av_bprintf(bp, "%sreal_to_imaginary", prev > 1 ? sep : "");
 if ((f & FF_TX_ASM_CALL) && ++prev)
 av_bprintf(bp, "%sasm_call", prev > 1 ? sep : "");
 av_bprintf(bp, "]");
@@ -717,7 +723,11 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType 
type,
 uint64_t req_flags = flags;
 
 /* Flags the codelet may require to be present */
-uint64_t inv_req_mask = AV_TX_FULL_IMDCT | FF_TX_PRESHUFFLE | 
FF_TX_ASM_CALL;
+uint64_t inv_req_mask = AV_TX_FULL_IMDCT |
+AV_TX_REAL_TO_REAL |
+AV_TX_REAL_TO_IMAGINARY |
+FF_TX_PRESHUFFLE |
+FF_TX_ASM_CALL;
 
 /* Unaligned codelets are compatible with the aligned flag */
 if (req_flags & FF_TX_ALIGNED)
@@ -742,7 +752,9 @@ av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType 
type,
 
 /* Check direction for non-orthogonal codelets */
 if (((cd->flags & FF_TX_FORWARD_ONLY) && inv) ||
-((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && 
!inv))
+((cd->flags & (FF_TX_INVERSE_ONLY | AV_TX_FULL_IMDCT)) && 
!inv) ||
+((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_REAL)) && 
inv) ||
+((cd->flags & (FF_TX_FORWARD_ONLY | AV_TX_REAL_TO_IMAGINARY)) 
&& inv))
 continue;
 
 /* Check if the requested flags match from both sides */
diff --git a/libavutil/tx.h b/libavutil/tx.h
index 064edbc097..d178e8ee9d 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -149,6 +149,16 @@ enum AVTXFlags {
  * Ignored for all transforms but inverse MDCTs.
  */
 AV_TX_FULL_IMDCT = 1ULL << 2,
+
+/**
+ * Perform a real to half-complex RDFT.
+ * Only the real, or imaginary coefficients will
+ * be output, depending on the flag used. Only available for forward RDFTs.
+ * Output array must have enough space to hold N complex values
+ * (regular size for a real to complex transform).
+ */
+AV_TX_REAL_TO_REAL  = 1ULL << 3,
+AV_TX_REAL_TO_IMAGINARY = 1ULL << 4,
 };
 
 /**
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c4ec9502e0..c56dcf0826 100

[FFmpeg-cvslog] wmavoice: convert DCT-I/DST-I to lavu/tx

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Aug  4 21:16:30 2023 
+0200| [a810126501e1ef0992d765720ff0d2629c5d1616] | committer: Lynne

wmavoice: convert DCT-I/DST-I to lavu/tx

This is the very last user of any lavc transform code.

This also *corrects* wmavoice decoding, as the previous DCT/DST
transforms were incorrect, bringing it closer to Microsoft's
own wmavoice decoder.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a810126501e1ef0992d765720ff0d2629c5d1616
---

 libavcodec/wmavoice.c | 29 +
 tests/fate/wma.mak|  9 ++---
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 5ae92e2dbc..915315cb8a 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -42,8 +42,6 @@
 #include "acelp_vectors.h"
 #include "acelp_filters.h"
 #include "lsp.h"
-#include "dct.h"
-#include "rdft.h"
 #include "sinewin.h"
 
 #define MAX_BLOCKS   8   ///< maximum number of blocks per frame
@@ -266,8 +264,8 @@ typedef struct WMAVoiceContext {
  */
 AVTXContext *rdft, *irdft;///< contexts for FFT-calculation in the
 av_tx_fn rdft_fn, irdft_fn;   ///< postfilter (for denoise filter)
-DCTContext dct, dst;  ///< contexts for phase shift (in Hilbert
-  ///< transform, part of postfilter)
+AVTXContext *dct, *dst;   ///< contexts for phase shift (in Hilbert
+av_tx_fn dct_fn, dst_fn;  ///< transform, part of postfilter)
 float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
   ///< range
 float postfilter_agc; ///< gain control memory, used in
@@ -391,10 +389,6 @@ static av_cold int wmavoice_decode_init(AVCodecContext 
*ctx)
 if (s->do_apf) {
 float scale = 1.0f;
 
-if ((ret = ff_dct_init (>dct,   6,DCT_I)) < 0 ||
-(ret = ff_dct_init (>dst,   6,DST_I)) < 0)
-return ret;
-
 ret = av_tx_init(>rdft, >rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, 
, 0);
 if (ret < 0)
 return ret;
@@ -403,6 +397,16 @@ static av_cold int wmavoice_decode_init(AVCodecContext 
*ctx)
 if (ret < 0)
 return ret;
 
+scale = 1.0 / (1 << 6);
+ret = av_tx_init(>dct, >dct_fn, AV_TX_FLOAT_DCT_I, 0, 1 << 6, 
, 0);
+if (ret < 0)
+return ret;
+
+scale = 1.0 / (1 << 6);
+ret = av_tx_init(>dst, >dst_fn, AV_TX_FLOAT_DST_I, 0, 1 << 6, 
, 0);
+if (ret < 0)
+return ret;
+
 ff_sine_window_init(s->cos, 256);
 memcpy(>sin[255], s->cos, 256 * sizeof(s->cos[0]));
 for (n = 0; n < 255; n++) {
@@ -612,6 +616,7 @@ static void calc_input_response(WMAVoiceContext *s, float 
*lpcs_src,
 float irange, angle_mul, gain_mul, range, sq;
 LOCAL_ALIGNED_32(float, coeffs, [0x82]);
 LOCAL_ALIGNED_32(float, lpcs, [0x82]);
+LOCAL_ALIGNED_32(float, lpcs_dct, [0x82]);
 int n, idx;
 
 memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
@@ -662,8 +667,8 @@ static void calc_input_response(WMAVoiceContext *s, float 
*lpcs_src,
  * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
  * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
  * "moment" of the LPCs in this filter. */
-s->dct.dct_calc(>dct, lpcs);
-s->dst.dct_calc(>dst, lpcs);
+s->dct_fn(s->dct, lpcs_dct, lpcs, sizeof(float));
+s->dst_fn(s->dst, lpcs, lpcs_dct, sizeof(float));
 
 /* Split out the coefficient indexes into phase/magnitude pairs */
 idx = 255 + av_clip(lpcs[64],   -255, 255);
@@ -2003,8 +2008,8 @@ static av_cold int wmavoice_decode_end(AVCodecContext 
*ctx)
 if (s->do_apf) {
 av_tx_uninit(>rdft);
 av_tx_uninit(>irdft);
-ff_dct_end(>dct);
-ff_dct_end(>dst);
+av_tx_uninit(>dct);
+av_tx_uninit(>dst);
 }
 
 return 0;
diff --git a/tests/fate/wma.mak b/tests/fate/wma.mak
index c13874ebfc..308dced9d6 100644
--- a/tests/fate/wma.mak
+++ b/tests/fate/wma.mak
@@ -20,17 +20,20 @@ fate-wmapro: $(FATE_WMAPRO-yes)
 
 FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-7k
 fate-wmavoice-7k: CMD = pcm -i $(TARGET_SAMPLES)/wmavoice/streaming_CBR-7K.wma
-fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K.pcm
+fate-wmavoice-7k: REF = $(SAMPLES)/wmavoice/streaming_CBR-7K_ref.pcm
+fate-wmavoice-7k: CMP_TARGET = 1368.61
 fate-wmavoice-7k: FUZZ = 3
 
 FATE_WMAVOICE-$(call DEMDEC, ASF, WMAVOICE) += fate-wmavoice-11k
 fate-wmavoice-11k: CMD = pcm -i 
$(TARGET_SAMPLES)/wmavoice/streaming_CBR-11K.wma
-fate-wmavoice-11k: REF = $(SAMPLES)/wmavoice/streaming_CBR-11K.pcm
+fate-wmavoice-11k: R

[FFmpeg-cvslog] lavu/tx: add DCT-I and DST-I transforms

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Thu Aug  3 18:23:02 2023 
+0200| [ef8fd7bc3c479ec00b3e1f490d44f9c93894d210] | committer: Lynne

lavu/tx: add DCT-I and DST-I transforms

These are true, actual DCT-I and DST-I transforms, unlike the
libavcodec versions, which are plainly not.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ef8fd7bc3c479ec00b3e1f490d44f9c93894d210
---

 libavutil/tx.h  |  24 +++
 libavutil/tx_template.c | 103 
 2 files changed, 127 insertions(+)

diff --git a/libavutil/tx.h b/libavutil/tx.h
index d178e8ee9d..4696988cae 100644
--- a/libavutil/tx.h
+++ b/libavutil/tx.h
@@ -105,6 +105,30 @@ enum AVTXType {
 AV_TX_DOUBLE_DCT = 10,
 AV_TX_INT32_DCT  = 11,
 
+/**
+ * Discrete Cosine Transform I
+ *
+ * The forward transform is a DCT-I.
+ * The inverse transform is a DCT-I multiplied by 2/(N + 1).
+ *
+ * The input array is always overwritten.
+ */
+AV_TX_FLOAT_DCT_I  = 12,
+AV_TX_DOUBLE_DCT_I = 13,
+AV_TX_INT32_DCT_I  = 14,
+
+/**
+ * Discrete Sine Transform I
+ *
+ * The forward transform is a DST-I.
+ * The inverse transform is a DST-I multiplied by 2/(N + 1).
+ *
+ * The input array is always overwritten.
+ */
+AV_TX_FLOAT_DST_I  = 15,
+AV_TX_DOUBLE_DST_I = 16,
+AV_TX_INT32_DST_I  = 17,
+
 /* Not part of the API, do not use */
 AV_TX_NB,
 };
diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index c56dcf0826..c026cb40c4 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -2004,6 +2004,107 @@ static const FFTXCodelet TX_NAME(ff_tx_dctIII_def) = {
 .prio   = FF_TX_PRIO_BASE,
 };
 
+static av_cold int TX_NAME(ff_tx_dcstI_init)(AVTXContext *s,
+ const FFTXCodelet *cd,
+ uint64_t flags,
+ FFTXCodeletOptions *opts,
+ int len, int inv,
+ const void *scale)
+{
+int ret;
+SCALE_TYPE rsc = *((SCALE_TYPE *)scale);
+
+if (inv) {
+len *= 2;
+s->len *= 2;
+rsc *= 0.5;
+}
+
+/* We want a half-complex RDFT */
+flags |= cd->type == TX_TYPE(DCT_I) ? AV_TX_REAL_TO_REAL :
+  AV_TX_REAL_TO_IMAGINARY;
+
+if ((ret = ff_tx_init_subtx(s, TX_TYPE(RDFT), flags, NULL,
+(len - 1 + 2*(cd->type == TX_TYPE(DST_I)))*2,
+0, )))
+return ret;
+
+s->tmp = av_mallocz((len + 1)*2*sizeof(TXSample));
+if (!s->tmp)
+return AVERROR(ENOMEM);
+
+return 0;
+}
+
+static void TX_NAME(ff_tx_dctI)(AVTXContext *s, void *_dst,
+void *_src, ptrdiff_t stride)
+{
+TXSample *dst = _dst;
+TXSample *src = _src;
+const int len = s->len - 1;
+TXSample *tmp = (TXSample *)s->tmp;
+
+stride /= sizeof(TXSample);
+
+for (int i = 0; i < len; i++)
+tmp[i] = tmp[2*len - i] = src[i * stride];
+
+tmp[len] = src[len * stride]; /* Middle */
+
+s->fn[0](>sub[0], dst, tmp, sizeof(TXSample));
+}
+
+static void TX_NAME(ff_tx_dstI)(AVTXContext *s, void *_dst,
+void *_src, ptrdiff_t stride)
+{
+TXSample *dst = _dst;
+TXSample *src = _src;
+const int len = s->len + 1;
+TXSample *tmp = (void *)s->tmp;
+
+stride /= sizeof(TXSample);
+
+tmp[0] = 0;
+
+for (int i = 1; i < len; i++) {
+TXSample a = src[(i - 1) * stride];
+tmp[i] = -a;
+tmp[2*len - i] = a;
+}
+
+tmp[len] = 0; /* i == n, Nyquist */
+
+s->fn[0](>sub[0], dst, tmp, sizeof(float));
+}
+
+static const FFTXCodelet TX_NAME(ff_tx_dctI_def) = {
+.name   = TX_NAME_STR("dctI"),
+.function   = TX_NAME(ff_tx_dctI),
+.type   = TX_TYPE(DCT_I),
+.flags  = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+.factors= { 2, TX_FACTOR_ANY },
+.nb_factors = 2,
+.min_len= 2,
+.max_len= TX_LEN_UNLIMITED,
+.init   = TX_NAME(ff_tx_dcstI_init),
+.cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+.prio   = FF_TX_PRIO_BASE,
+};
+
+static const FFTXCodelet TX_NAME(ff_tx_dstI_def) = {
+.name   = TX_NAME_STR("dstI"),
+.function   = TX_NAME(ff_tx_dstI),
+.type   = TX_TYPE(DST_I),
+.flags  = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE,
+.factors= { 2, TX_FACTOR_ANY },
+.nb_factors = 2,
+.min_len= 2,
+.max_len= TX_LEN_UNLIMITED,
+.init   = TX_NAME(ff_tx_dcstI_init),
+.cpu_flags  = FF_TX_CPU_FLAGS_ALL,
+.prio   = FF_TX_PRIO_BASE,
+};
+
 int TX_TAB(ff_tx_mdct_gen_exp)(AVTXContext *s, int *pre_tab)
 {
 int off = 0;
@@ -2101,6 +2202,8 @@ con

[FFmpeg-cvslog] wmavoice: convert RDFT to lavu/tx

2023-09-01 Thread Lynne
ffmpeg | branch: master | Lynne  | Fri Aug  4 20:20:10 2023 
+0200| [d895d3c8c79e301f9d6f3aab0cc754ac2b7d78fb] | committer: Lynne

wmavoice: convert RDFT to lavu/tx

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d895d3c8c79e301f9d6f3aab0cc754ac2b7d78fb
---

 libavcodec/wmavoice.c | 75 ++-
 1 file changed, 45 insertions(+), 30 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 44fda0e2d6..5ae92e2dbc 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -31,6 +31,7 @@
 #include "libavutil/float_dsp.h"
 #include "libavutil/mem_internal.h"
 #include "libavutil/thread.h"
+#include "libavutil/tx.h"
 #include "avcodec.h"
 #include "codec_internal.h"
 #include "decode.h"
@@ -263,8 +264,8 @@ typedef struct WMAVoiceContext {
  * smoothing and so on, and context variables for FFT/iFFT.
  * @{
  */
-RDFTContext rdft, irdft;  ///< contexts for FFT-calculation in the
-  ///< postfilter (for denoise filter)
+AVTXContext *rdft, *irdft;///< contexts for FFT-calculation in the
+av_tx_fn rdft_fn, irdft_fn;   ///< postfilter (for denoise filter)
 DCTContext dct, dst;  ///< contexts for phase shift (in Hilbert
   ///< transform, part of postfilter)
 float sin[511], cos[511]; ///< 8-bit cosine/sine windows over [-pi,pi]
@@ -277,9 +278,9 @@ typedef struct WMAVoiceContext {
   ///< by postfilter
 float denoise_filter_cache[MAX_FRAMESIZE];
 int   denoise_filter_cache_size; ///< samples in #denoise_filter_cache
-DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x80];
+DECLARE_ALIGNED(32, float, tilted_lpcs_pf)[0x82];
   ///< aligned buffer for LPC tilting
-DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x80];
+DECLARE_ALIGNED(32, float, denoise_coeffs_pf)[0x82];
   ///< aligned buffer for denoise coefficients
 DECLARE_ALIGNED(32, float, synth_filter_out_buf)[0x80 + MAX_LSPS_ALIGN16];
   ///< aligned buffer for postfilter speech
@@ -388,12 +389,20 @@ static av_cold int wmavoice_decode_init(AVCodecContext 
*ctx)
 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
 s->do_apf=flags & 0x1;
 if (s->do_apf) {
-if ((ret = ff_rdft_init(>rdft,  7,  DFT_R2C)) < 0 ||
-(ret = ff_rdft_init(>irdft, 7, IDFT_C2R)) < 0 ||
-(ret = ff_dct_init (>dct,   6,DCT_I)) < 0 ||
+float scale = 1.0f;
+
+if ((ret = ff_dct_init (>dct,   6,DCT_I)) < 0 ||
 (ret = ff_dct_init (>dst,   6,DST_I)) < 0)
 return ret;
 
+ret = av_tx_init(>rdft, >rdft_fn, AV_TX_FLOAT_RDFT, 0, 1 << 7, 
, 0);
+if (ret < 0)
+return ret;
+
+ret = av_tx_init(>irdft, >irdft_fn, AV_TX_FLOAT_RDFT, 1, 1 << 7, 
, 0);
+if (ret < 0)
+return ret;
+
 ff_sine_window_init(s->cos, 256);
 memcpy(>sin[255], s->cos, 256 * sizeof(s->cos[0]));
 for (n = 0; n < 255; n++) {
@@ -596,20 +605,24 @@ static float tilt_factor(const float *lpcs, int n_lpcs)
 /**
  * Derive denoise filter coefficients (in real domain) from the LPCs.
  */
-static void calc_input_response(WMAVoiceContext *s, float *lpcs,
-int fcb_type, float *coeffs, int remainder)
+static void calc_input_response(WMAVoiceContext *s, float *lpcs_src,
+int fcb_type, float *coeffs_dst, int remainder)
 {
 float last_coeff, min = 15.0, max = -15.0;
 float irange, angle_mul, gain_mul, range, sq;
+LOCAL_ALIGNED_32(float, coeffs, [0x82]);
+LOCAL_ALIGNED_32(float, lpcs, [0x82]);
 int n, idx;
 
+memcpy(coeffs, coeffs_dst, 0x82*sizeof(float));
+
 /* Create frequency power spectrum of speech input (i.e. RDFT of LPCs) */
-s->rdft.rdft_calc(>rdft, lpcs);
+s->rdft_fn(s->rdft, lpcs, lpcs_src, sizeof(float));
 #define log_range(var, assign) do { \
 float tmp = log10f(assign);  var = tmp; \
 max   = FFMAX(max, tmp); min = FFMIN(min, tmp); \
 } while (0)
-log_range(last_coeff,  lpcs[1] * lpcs[1]);
+log_range(last_coeff,  lpcs[64] * lpcs[64]);
 for (n = 1; n < 64; n++)
 log_range(lpcs[n], lpcs[n * 2] * lpcs[n * 2] +
lpcs[n * 2 + 1] * lpcs[n * 2 + 1]);
@@ -668,25 +681,25 @@ static void calc_input_response(WMAVoiceContext *s, float 
*lpcs,
 coeffs[n * 2 + 1] = coeffs[n] * s->sin[idx];
 coeffs[n * 2] = coeffs[n] * s->cos[idx];
 }
-coeffs[1] = last_coeff;
+coeffs[64] = last_coeff;
 

[FFmpeg-cvslog] vulkan: do not leak bound_buffer_indices

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:53:03 2023 
+| [d0a64f9a81530436e3b3e86767f51d481d3ce4a8] | committer: Lynne

vulkan: do not leak bound_buffer_indices

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d0a64f9a81530436e3b3e86767f51d481d3ce4a8
---

 libavutil/vulkan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 00a7b5b801..dec8ccad64 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1884,6 +1884,7 @@ void ff_vk_pipeline_free(FFVulkanContext *s, 
FFVulkanPipeline *pl)
 
 av_freep(>desc_set);
 av_freep(>desc_bind);
+av_freep(>bound_buffer_indices);
 av_freep(>push_consts);
 pl->push_consts_num = 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/scale_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:52:13 2023 
+| [d0ab2e2f98a220f87fd5d581c7e0e13cfe366f4b] | committer: Lynne

lavfi/scale_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d0ab2e2f98a220f87fd5d581c7e0e13cfe366f4b
---

 libavfilter/vf_scale_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vf_scale_vulkan.c b/libavfilter/vf_scale_vulkan.c
index 6699bab934..14f471b819 100644
--- a/libavfilter/vf_scale_vulkan.c
+++ b/libavfilter/vf_scale_vulkan.c
@@ -256,8 +256,6 @@ static av_cold int init_filter(AVFilterContext *ctx, 
AVFrame *in)
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/transpose_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:52:22 2023 
+| [3ef1e50c922afa69404c07486a2251d5795f3c85] | committer: Lynne

lavfi/transpose_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3ef1e50c922afa69404c07486a2251d5795f3c85
---

 libavfilter/vf_transpose_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vf_transpose_vulkan.c 
b/libavfilter/vf_transpose_vulkan.c
index 3abe93be0b..c9a520ce5f 100644
--- a/libavfilter/vf_transpose_vulkan.c
+++ b/libavfilter/vf_transpose_vulkan.c
@@ -121,8 +121,6 @@ static av_cold int init_filter(AVFilterContext *ctx, 
AVFrame *in)
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/testsrc_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:52:34 2023 
+| [5f8feddd6a09214816e8fd0e674df6fc5e33732d] | committer: Lynne

lavfi/testsrc_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5f8feddd6a09214816e8fd0e674df6fc5e33732d
---

 libavfilter/vsrc_testsrc_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vsrc_testsrc_vulkan.c 
b/libavfilter/vsrc_testsrc_vulkan.c
index 8485fa498d..8761c21dfd 100644
--- a/libavfilter/vsrc_testsrc_vulkan.c
+++ b/libavfilter/vsrc_testsrc_vulkan.c
@@ -189,8 +189,6 @@ static av_cold int init_filter(AVFilterContext *ctx, enum 
TestSrcVulkanMode mode
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/nlmeans_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:52:04 2023 
+| [0f2ae1ba6939a1dede0cf02e43daf50218416093] | committer: Lynne

lavfi/nlmeans_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0f2ae1ba6939a1dede0cf02e43daf50218416093
---

 libavfilter/vf_nlmeans_vulkan.c | 9 +++--
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_nlmeans_vulkan.c b/libavfilter/vf_nlmeans_vulkan.c
index 7da31b73f5..99f4f867e7 100644
--- a/libavfilter/vf_nlmeans_vulkan.c
+++ b/libavfilter/vf_nlmeans_vulkan.c
@@ -424,8 +424,6 @@ static av_cold int init_weights_pipeline(FFVulkanContext 
*vkctx, FFVkExecPool *e
 RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
 RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);
@@ -570,8 +568,6 @@ static av_cold int init_denoise_pipeline(FFVulkanContext 
*vkctx, FFVkExecPool *e
 RET(ff_vk_init_compute_pipeline(vkctx, pl, shd));
 RET(ff_vk_exec_pipeline_register(vkctx, exec, pl));
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);
@@ -671,8 +667,6 @@ static av_cold int init_filter(AVFilterContext *ctx)
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv)
 spv->uninit();
@@ -1065,6 +1059,9 @@ static void nlmeans_vulkan_uninit(AVFilterContext *avctx)
 
 ff_vk_uninit(>vkctx);
 
+av_freep(>xoffsets);
+av_freep(>yoffsets);
+
 s->initialized = 0;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/bwdif_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:51:36 2023 
+| [a4673c9dff2432d5cc903d345cd62c83d7436a94] | committer: Lynne

lavfi/bwdif_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a4673c9dff2432d5cc903d345cd62c83d7436a94
---

 libavfilter/vf_bwdif_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vf_bwdif_vulkan.c b/libavfilter/vf_bwdif_vulkan.c
index db916b22cd..f1623e6ef7 100644
--- a/libavfilter/vf_bwdif_vulkan.c
+++ b/libavfilter/vf_bwdif_vulkan.c
@@ -252,8 +252,6 @@ static av_cold int init_filter(AVFilterContext *ctx)
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/chromaber_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:51:55 2023 
+| [9944e96c61739514ffa99f0e9229b342b3d5ff45] | committer: Lynne

lavfi/chromaber_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9944e96c61739514ffa99f0e9229b342b3d5ff45
---

 libavfilter/vf_chromaber_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vf_chromaber_vulkan.c 
b/libavfilter/vf_chromaber_vulkan.c
index 8b196f149e..0b96a7400f 100644
--- a/libavfilter/vf_chromaber_vulkan.c
+++ b/libavfilter/vf_chromaber_vulkan.c
@@ -156,8 +156,6 @@ static av_cold int init_filter(AVFilterContext *ctx, 
AVFrame *in)
 
 s->initialized = 1;
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan: do not leak cooperative matrix properties

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 23:20:47 2023 
+0200| [747871a42c0021fd82f0ca806b8419a29f627d7f] | committer: Lynne

vulkan: do not leak cooperative matrix properties

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=747871a42c0021fd82f0ca806b8419a29f627d7f
---

 libavutil/vulkan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 684b92de57..8e36ba508c 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -1893,6 +1893,7 @@ void ff_vk_uninit(FFVulkanContext *s)
 av_freep(>query_props);
 av_freep(>qf_props);
 av_freep(>video_props);
+av_freep(>coop_mat_props);
 
 av_buffer_unref(>frames_ref);
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavfi/avgblur_vulkan: fix memory leaks

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 26 21:51:19 2023 
+| [b6cc53092aeac94938b5e28fe4435ceee365bbcb] | committer: Lynne

lavfi/avgblur_vulkan: fix memory leaks

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b6cc53092aeac94938b5e28fe4435ceee365bbcb
---

 libavfilter/vf_avgblur_vulkan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavfilter/vf_avgblur_vulkan.c b/libavfilter/vf_avgblur_vulkan.c
index ae32cd7324..6bc1b616a6 100644
--- a/libavfilter/vf_avgblur_vulkan.c
+++ b/libavfilter/vf_avgblur_vulkan.c
@@ -153,8 +153,6 @@ static av_cold int init_filter(AVFilterContext *ctx, 
AVFrame *in)
 s->opts.filter_norm[2] = s->opts.filter_norm[0];
 s->opts.filter_norm[3] = s->opts.filter_norm[0];
 
-return 0;
-
 fail:
 if (spv_opaque)
 spv->free_shader(spv, _opaque);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan: check for extension rather than function pointer

2023-08-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Aug 28 22:20:45 2023 
+0200| [f6cf3a40e479b5f1e98b11cf6c816baf34cb3bac] | committer: Lynne

vulkan: check for extension rather than function pointer

The loader ensures only that functions with tagged supported extensions
exist, rather than ensuring only those with supported extensions are
loaded.
As the init function uses Vulkan functions, whose loading requires them
to have the extension flags set, the extension flags are guaranteed
to also exist at this point.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f6cf3a40e479b5f1e98b11cf6c816baf34cb3bac
---

 libavutil/vulkan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 8e36ba508c..00a7b5b801 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -165,7 +165,7 @@ int ff_vk_load_props(FFVulkanContext *s)
 
 vk->GetPhysicalDeviceQueueFamilyProperties2(s->hwctx->phys_dev, 
>tot_nb_qfs, s->qf_props);
 
-if (vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR) {
+if (s->extensions & FF_VK_EXT_COOP_MATRIX) {
 vk->GetPhysicalDeviceCooperativeMatrixPropertiesKHR(s->hwctx->phys_dev,
 
>coop_mat_props_nb, NULL);
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] vulkan: enable VK_KHR_cooperative_matrix

2023-08-26 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Aug 12 10:46:45 2023 
+| [358919506d611493508c8af203c4dd15706c570f] | committer: Lynne

vulkan: enable VK_KHR_cooperative_matrix

It's of interest to API users, and of interest to us,
as a DCT/DST can be implemented via matrix multiplies.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=358919506d611493508c8af203c4dd15706c570f
---

 configure|  4 ++--
 libavutil/hwcontext_vulkan.c | 14 --
 libavutil/vulkan.c   | 25 -
 libavutil/vulkan.h   |  4 
 libavutil/vulkan_functions.h |  2 ++
 libavutil/vulkan_loader.h|  1 +
 6 files changed, 45 insertions(+), 5 deletions(-)

diff --git a/configure b/configure
index c1e592729a..bd7f7697c8 100755
--- a/configure
+++ b/configure
@@ -7149,8 +7149,8 @@ enabled crystalhd && check_lib crystalhd "stdint.h 
libcrystalhd/libcrystalhd_if.
  "in maintaining it."
 
 if enabled vulkan; then
-check_pkg_config_header_only vulkan "vulkan >= 1.3.238" "vulkan/vulkan.h" 
"defined VK_VERSION_1_3" ||
-check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) 
|| (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 238)"
+check_pkg_config_header_only vulkan "vulkan >= 1.3.255" "vulkan/vulkan.h" 
"defined VK_VERSION_1_3" ||
+check_cpp_condition vulkan "vulkan/vulkan.h" "defined(VK_VERSION_1_4) 
|| (defined(VK_VERSION_1_3) && VK_HEADER_VERSION >= 255)"
 fi
 
 if enabled x86; then
diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 54faf16a69..711a32a0ac 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -99,6 +99,7 @@ typedef struct VulkanDevicePriv {
 VkPhysicalDeviceVulkan13Features device_features_1_3;
 VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features;
 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features;
+VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features;
 
 /* Queues */
 pthread_mutex_t **qf_mutex;
@@ -405,6 +406,7 @@ static const VulkanOptExtension optional_device_exts[] = {
 { VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
FF_VK_EXT_DESCRIPTOR_BUFFER, },
 { VK_EXT_PHYSICAL_DEVICE_DRM_EXTENSION_NAME,  
FF_VK_EXT_DEVICE_DRM },
 { VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME,  
FF_VK_EXT_ATOMIC_FLOAT   },
+{ VK_KHR_COOPERATIVE_MATRIX_EXTENSION_NAME,   
FF_VK_EXT_COOP_MATRIX},
 
 /* Imports/exports */
 { VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,   
FF_VK_EXT_EXTERNAL_FD_MEMORY },
@@ -1202,9 +1204,13 @@ static int 
vulkan_device_create_internal(AVHWDeviceContext *ctx,
 VkPhysicalDeviceTimelineSemaphoreFeatures timeline_features = {
 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
 };
+VkPhysicalDeviceCooperativeMatrixFeaturesKHR coop_matrix_features = {
+.sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR,
+.pNext = _features,
+};
 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_features = {
 .sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
-.pNext = _features,
+.pNext = _matrix_features,
 };
 VkPhysicalDeviceDescriptorBufferFeaturesEXT desc_buf_features = {
 .sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
@@ -1242,7 +1248,9 @@ static int 
vulkan_device_create_internal(AVHWDeviceContext *ctx,
 p->desc_buf_features.sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT;
 p->desc_buf_features.pNext = >atomic_float_features;
 p->atomic_float_features.sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT;
-p->atomic_float_features.pNext = NULL;
+p->atomic_float_features.pNext = >coop_matrix_features;
+p->coop_matrix_features.sType = 
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR;
+p->coop_matrix_features.pNext = NULL;
 
 ctx->free = vulkan_device_free;
 
@@ -1304,6 +1312,8 @@ static int 
vulkan_device_create_internal(AVHWDeviceContext *ctx,
 p->atomic_float_features.shaderBufferFloat32Atomics = 
atomic_float_features.shaderBufferFloat32Atomics;
 p->atomic_float_features.shaderBufferFloat32AtomicAdd = 
atomic_float_features.shaderBufferFloat32AtomicAdd;
 
+p->coop_matrix_features.cooperativeMatrix = 
coop_matrix_features.cooperativeMatrix;
+
 dev_info.pNext = >device_features;
 
 /* Setup queue family */
diff --git a/libavutil/vulkan.c b/libavutil/vulkan.c
index 48f5f4b5dc..684b92de57 100644
--- a/libavutil/vulkan.c
+++ b/libavutil/vulkan.c
@@ -90,9 +90,13 @@ int ff_vk_load_pro

[FFmpeg-cvslog] hwcontext_vulkan: remove optional encode/decode extensions from the list

2023-02-06 Thread Lynne
ffmpeg | branch: release/5.0 | Lynne  | Sun Dec 25 01:03:30 2022 
+0100| [376a1ebfcb62da503acd9fb1bddcafd1d4a08e22] | committer: Jan Ekström

hwcontext_vulkan: remove optional encode/decode extensions from the list

They're not currently used, so they don't need to be there.
Vulkan stabilized the decode extensions less than a week ago, and their
name prefixes were changed from EXT to KHR. It's a bit too soon to be
depending on it, so rather than bumping, just remove these for now.

(cherry picked from commit eb0455d64690eed0068e5cb202f72ecdf899837c)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=376a1ebfcb62da503acd9fb1bddcafd1d4a08e22
---

 libavutil/hwcontext_vulkan.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 60a6cf6a91..82cae4c757 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -354,14 +354,6 @@ static const VulkanOptExtension optional_device_exts[] = {
 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
FF_VK_EXT_EXTERNAL_WIN32_MEMORY  },
 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, 
FF_VK_EXT_EXTERNAL_WIN32_SEM },
 #endif
-
-/* Video encoding/decoding */
-{ VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,  
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
 };
 
 /* Converts return values to strings */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] hwcontext_vulkan: remove optional encode/decode extensions from the list

2023-02-06 Thread Lynne
ffmpeg | branch: release/5.1 | Lynne  | Sun Dec 25 01:03:30 2022 
+0100| [7268323193d55365f914de39fadd5dbdb1f68976] | committer: Jan Ekström

hwcontext_vulkan: remove optional encode/decode extensions from the list

They're not currently used, so they don't need to be there.
Vulkan stabilized the decode extensions less than a week ago, and their
name prefixes were changed from EXT to KHR. It's a bit too soon to be
depending on it, so rather than bumping, just remove these for now.

(cherry picked from commit eb0455d64690eed0068e5cb202f72ecdf899837c)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7268323193d55365f914de39fadd5dbdb1f68976
---

 libavutil/hwcontext_vulkan.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index 237caa4bc0..3bc0dc8a40 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -354,14 +354,6 @@ static const VulkanOptExtension optional_device_exts[] = {
 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
FF_VK_EXT_EXTERNAL_WIN32_MEMORY  },
 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, 
FF_VK_EXT_EXTERNAL_WIN32_SEM },
 #endif
-
-/* Video encoding/decoding */
-{ VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,  
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
 };
 
 /* Converts return values to strings */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] x86: replace explicit REP_RETs with RETs

2023-01-31 Thread Lynne
ffmpeg | branch: master | Lynne  | Wed Feb  1 02:26:20 2023 
+0100| [bbe95f7353a972f28a48be8da883549f02c59e4b] | committer: Lynne

x86: replace explicit REP_RETs with RETs

>From x86inc:
> On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either
> a branch or a branch target. So switch to a 2-byte form of ret in that case.
> We can automatically detect "follows a branch", but not a branch target.
> (SSSE3 is a sufficient condition to know that your cpu doesn't have this 
> problem.)

x86inc can automatically determine whether to use REP_RET rather than
REP in most of these cases, so impact is minimal. Additionally, a few
REP_RETs were used unnecessary, despite the return being nowhere near a
branch.

The only CPUs affected were AMD K10s, made between 2007 and 2011, 16
years ago and 12 years ago, respectively.

In the future, everyone involved with x86inc should consider dropping
REP_RETs altogether.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bbe95f7353a972f28a48be8da883549f02c59e4b
---

 libavcodec/x86/aacpsdsp.asm | 10 +-
 libavcodec/x86/ac3dsp.asm   |  6 +++---
 libavcodec/x86/alacdsp.asm  |  4 ++--
 libavcodec/x86/audiodsp.asm |  2 +-
 libavcodec/x86/dirac_dwt.asm| 14 +++---
 libavcodec/x86/fft.asm  |  8 
 libavcodec/x86/flacdsp.asm  |  8 
 libavcodec/x86/h264_chromamc.asm| 18 +-
 libavcodec/x86/h264_chromamc_10bit.asm  | 10 +-
 libavcodec/x86/h264_deblock_10bit.asm   |  6 +++---
 libavcodec/x86/h264_idct.asm| 10 +-
 libavcodec/x86/h264_idct_10bit.asm  |  8 
 libavcodec/x86/h264_intrapred.asm   | 24 
 libavcodec/x86/h264_intrapred_10bit.asm | 16 
 libavcodec/x86/h264_qpel_10bit.asm  |  2 +-
 libavcodec/x86/h264_qpel_8bit.asm   | 26 +-
 libavcodec/x86/h264_weight.asm  | 12 ++--
 libavcodec/x86/h264_weight_10bit.asm| 12 ++--
 libavcodec/x86/hevc_sao.asm |  2 +-
 libavcodec/x86/hevc_sao_10bit.asm   |  2 +-
 libavcodec/x86/hpeldsp.asm  | 20 ++--
 libavcodec/x86/hpeldsp_vp3.asm  |  4 ++--
 libavcodec/x86/huffyuvdsp.asm   |  2 +-
 libavcodec/x86/jpeg2000dsp.asm  |  4 ++--
 libavcodec/x86/lossless_videodsp.asm|  2 +-
 libavcodec/x86/lossless_videoencdsp.asm |  2 +-
 libavcodec/x86/me_cmp.asm   |  2 +-
 libavcodec/x86/pngdsp.asm   |  2 +-
 libavcodec/x86/qpel.asm |  6 +++---
 libavcodec/x86/qpeldsp.asm  | 12 ++--
 libavcodec/x86/rv34dsp.asm  |  2 +-
 libavcodec/x86/rv40dsp.asm  | 10 +-
 libavcodec/x86/sbrdsp.asm   | 12 ++--
 libavcodec/x86/takdsp.asm   |  8 
 libavcodec/x86/utvideodsp.asm   |  4 ++--
 libavcodec/x86/v210.asm |  2 +-
 libavcodec/x86/vc1dsp_mc.asm|  2 +-
 libavcodec/x86/videodsp.asm |  2 +-
 libavcodec/x86/vp8dsp.asm   | 28 ++--
 libavfilter/x86/af_volume.asm   |  6 +++---
 libavfilter/x86/avf_showcqt.asm |  4 ++--
 libavfilter/x86/scene_sad.asm   |  2 +-
 libavfilter/x86/vf_blend.asm|  2 +-
 libavfilter/x86/vf_framerate.asm|  2 +-
 libavfilter/x86/vf_gradfun.asm  |  6 +++---
 libavfilter/x86/vf_hqdn3d.asm   |  2 +-
 libavfilter/x86/vf_interlace.asm|  6 +++---
 libavfilter/x86/vf_maskedmerge.asm  |  2 +-
 libavfilter/x86/vf_stereo3d.asm |  2 +-
 libavfilter/x86/vf_w3fdif.asm   | 10 +-
 libavutil/x86/float_dsp.asm | 18 +-
 libavutil/x86/lls.asm   |  4 ++--
 libswresample/x86/audio_convert.asm | 12 ++--
 libswresample/x86/rematrix.asm  |  8 
 libswscale/x86/input.asm| 14 +++---
 libswscale/x86/output.asm   | 10 +-
 libswscale/x86/scale.asm|  2 +-
 libswscale/x86/scale_avx2.asm   |  2 +-
 libswscale/x86/yuv2yuvX.asm |  2 +-
 libswscale/x86/yuv_2_rgb.asm|  2 +-
 tests/checkasm/x86/checkasm.asm |  2 +-
 61 files changed, 223 insertions(+), 223 deletions(-)

diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index 105e1af5c5..cc496d4df8 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -49,7 +49,7 @@ align 16
 add  dstq, mmsize
 addnq, mmsize*2
 jl .loop
-REP_RET
+RET
 %endmacro
 
 INIT_XMM sse
@@ -83,7 +83,7 @@ align 16
 add   src2q, mmsize
 add  nq, mmsize*2
 jl .loop
-REP_RET
+RET
 
 ;***
 ;void ff_ps_stereo_interpolate_sse3(float (*l)[2

[FFmpeg-cvslog] configure: update copyright year

2022-12-31 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Jan  1 00:00:00 2023 
+0100| [62da0b4a741a064f118a0eece496d6bcc437ec91] | committer: Lynne

configure: update copyright year

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=62da0b4a741a064f118a0eece496d6bcc437ec91
---

 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index f08cdab3d1..675dc84f56 100755
--- a/configure
+++ b/configure
@@ -7832,7 +7832,7 @@ cat > $TMPH <https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] hwcontext_vulkan: remove optional encode/decode extensions from the list

2022-12-24 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Dec 25 01:03:30 2022 
+0100| [eb0455d64690eed0068e5cb202f72ecdf899837c] | committer: Lynne

hwcontext_vulkan: remove optional encode/decode extensions from the list

They're not currently used, so they don't need to be there.
Vulkan stabilized the decode extensions less than a week ago, and their
name prefixes were changed from EXT to KHR. It's a bit too soon to be
depending on it, so rather than bumping, just remove these for now.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eb0455d64690eed0068e5cb202f72ecdf899837c
---

 libavutil/hwcontext_vulkan.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/libavutil/hwcontext_vulkan.c b/libavutil/hwcontext_vulkan.c
index f1db1c7291..2a9b5f4aac 100644
--- a/libavutil/hwcontext_vulkan.c
+++ b/libavutil/hwcontext_vulkan.c
@@ -358,14 +358,6 @@ static const VulkanOptExtension optional_device_exts[] = {
 { VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
FF_VK_EXT_EXTERNAL_WIN32_MEMORY  },
 { VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME, 
FF_VK_EXT_EXTERNAL_WIN32_SEM },
 #endif
-
-/* Video encoding/decoding */
-{ VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,  
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_KHR_VIDEO_ENCODE_QUEUE_EXTENSION_NAME,   
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_ENCODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H264_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
-{ VK_EXT_VIDEO_DECODE_H265_EXTENSION_NAME,
FF_VK_EXT_NO_FLAG},
 };
 
 /* Converts return values to strings */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavu/tx: zero-out imaginary of last coefficient in forward RDFTs

2022-12-03 Thread Lynne
ffmpeg | branch: master | Lynne  | Sat Dec  3 20:16:19 2022 
+0100| [710d83bdde6d598c88e9696bc4b9f6ea4b84f541] | committer: Lynne

lavu/tx: zero-out imaginary of last coefficient in forward RDFTs

We didn't do this, because it's zero anyway, but it prevents users from using
uninitialized memory in calculations.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=710d83bdde6d598c88e9696bc4b9f6ea4b84f541
---

 libavutil/tx_template.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c
index 56e4f6b04d..ac3dc22a2c 100644
--- a/libavutil/tx_template.c
+++ b/libavutil/tx_template.c
@@ -1688,7 +1688,7 @@ static void TX_NAME(ff_tx_rdft_ ##name)(AVTXContext *s, 
void *_dst,\
 } else {   
\
 /* Move [0].im to the last position, as convention requires */ 
\
 data[len2].re = data[0].im;
\
-data[   0].im = 0; 
\
+data[   0].im = data[len2].im = 0; 
\
 }  
\
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] x86/tx_float: fix stray change in 15xM FFT and replace imul->lea

2022-11-28 Thread Lynne
ffmpeg | branch: master | Lynne  | Mon Nov 28 04:15:36 2022 
+0100| [90c17a05aab798199f3cdafb7cab61f666f132be] | committer: Hendrik Leppkes

x86/tx_float: fix stray change in 15xM FFT and replace imul->lea

Thanks to rorgoroth for bisecting and kurosu for the lea suggestion.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=90c17a05aab798199f3cdafb7cab61f666f132be
---

 libavutil/x86/tx_float.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavutil/x86/tx_float.asm b/libavutil/x86/tx_float.asm
index 2ad84c2885..e1533a8595 100644
--- a/libavutil/x86/tx_float.asm
+++ b/libavutil/x86/tx_float.asm
@@ -1772,7 +1772,7 @@ IMDCT_FN avx2
 %macro PFA_15_FN 2
 INIT_YMM %1
 %if %2
-cglobal fft_pfa_15xM_asm_float, 0, 8, 0, ctx, out, in, stride, len, lut, buf, 
map, tgt, tmp, \
+cglobal fft_pfa_15xM_asm_float, 0, 0, 0, ctx, out, in, stride, len, lut, buf, 
map, tgt, tmp, \
  tgt5, stride3, stride5, btmp
 %else
 cglobal fft_pfa_15xM_float, 4, 14, 16, 320, ctx, out, in, stride, len, lut, 
buf, map, tgt, tmp, \
@@ -1892,7 +1892,7 @@ cglobal fft_pfa_15xM_float, 4, 14, 16, 320, ctx, out, in, 
stride, len, lut, buf,
 mov stride5q, lenq
 mov tgt5q, btmpq
 POP strideq
-imul tmpq, strideq, 3
+lea tmpq, [strideq + 2*strideq]
 
 .post:
 LOAD64_LUT m0, inq, stride3q, 0, tmpq, m8, m9

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-cvslog] lavu: bump minor and add APIchanges entry for lavu/tx DCT

2022-11-24 Thread Lynne
ffmpeg | branch: master | Lynne  | Sun Nov 20 21:17:30 2022 
+0100| [e97368eba5b48a958d3b398780e56b12db92d1a1] | committer: Lynne

lavu: bump minor and add APIchanges entry for lavu/tx DCT

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e97368eba5b48a958d3b398780e56b12db92d1a1
---

 doc/APIchanges | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/APIchanges b/doc/APIchanges
index 038ca865ec..ab7ce15fae 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -14,6 +14,9 @@ libavutil: 2021-04-27
 
 API changes, most recent first:
 
+2022-11-xx - xx - lavu 57.43.100 - tx.h
+  Add AV_TX_FLOAT_DCT, AV_TX_DOUBLE_DCT and AV_TX_INT32_DCT.
+
 2022-xx-xx - xx - lavu 57.42.100 - dict.h
   Add av_dict_iterate().
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".


  1   2   3   4   5   >