[FFmpeg-devel] [PATCH v4 3/3] swscale/la: Add output_lasx.c file.

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 -pix_fmt
rgb24 -y /dev/null -an
before: 150fps
after:  183fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |3 +-
 libswscale/loongarch/output_lasx.c| 1982 +
 libswscale/loongarch/swscale_init_loongarch.c |3 +
 libswscale/loongarch/swscale_loongarch.h  |6 +
 4 files changed, 1993 insertions(+), 1 deletion(-)
 create mode 100644 libswscale/loongarch/output_lasx.c

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index 4345971514..8e665e826c 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -2,4 +2,5 @@ OBJS-$(CONFIG_SWSCALE)  += 
loongarch/swscale_init_loongarch.o
 LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
loongarch/input_lasx.o   \
loongarch/yuv2rgb_lasx.o \
-   loongarch/rgb2rgb_lasx.o
+   loongarch/rgb2rgb_lasx.o \
+   loongarch/output_lasx.o
diff --git a/libswscale/loongarch/output_lasx.c 
b/libswscale/loongarch/output_lasx.c
new file mode 100644
index 00..36a4c4503b
--- /dev/null
+++ b/libswscale/loongarch/output_lasx.c
@@ -0,0 +1,1982 @@
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void ff_yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
+  const int16_t **src, uint8_t *dest, int dstW,
+  const uint8_t *dither, int offset)
+{
+int i;
+int len = dstW - 15;
+__m256i mask = {0x1C0C180814041000, 0x1C1814100C080400,
+0x1C0C180814041000, 0x1C1814100C080400};
+__m256i val1, val2, val3;
+uint8_t dither0 = dither[offset & 7];
+uint8_t dither1 = dither[(offset + 1) & 7];
+uint8_t dither2 = dither[(offset + 2) & 7];
+uint8_t dither3 = dither[(offset + 3) & 7];
+uint8_t dither4 = dither[(offset + 4) & 7];
+uint8_t dither5 = dither[(offset + 5) & 7];
+uint8_t dither6 = dither[(offset + 6) & 7];
+uint8_t dither7 = dither[(offset + 7) & 7];
+int val_1[8] = {dither0, dither2, dither4, dither6,
+dither0, dither2, dither4, dither6};
+int val_2[8] = {dither1, dither3, dither5, dither7,
+dither1, dither3, dither5, dither7};
+int val_3[8] = {dither0, dither1, dither2, dither3,
+dither4, dither5, dither6, dither7};
+
+DUP2_ARG2(__lasx_xvld, val_1, 0, val_2, 0, val1, val2);
+val3 = __lasx_xvld(val_3, 0);
+
+for (i = 0; i < len; i += 16) {
+int j;
+__m256i src0, filter0, val;
+__m256i val_ev, val_od;
+
+val_ev = __lasx_xvslli_w(val1, 12);
+val_od = __lasx_xvslli_w(val2, 12);
+
+for (j = 0; j < filterSize; j++) {
+src0  = __lasx_xvld(src[j]+ i, 0);
+filter0 = __lasx_xvldrepl_h((filter + j), 0);
+val_ev = __lasx_xvmaddwev_w_h(val_ev, src0, filter0);
+val_od = __lasx_xvmaddwod_w_h(val_od, src0, filter0);
+}
+val_ev = __lasx_xvsrai_w(val_ev, 19);
+val_od = __lasx_xvsrai_w(val_od, 19);
+val_ev = __lasx_xvclip255_w(val_ev);
+val_od = __lasx_xvclip255_w(val_od);
+val= __lasx_xvshuf_b(val_od, val_ev, mask);
+__lasx_xvstelm_d(val, (dest + i), 0, 0);
+__lasx_xvstelm_d(val, (dest + i), 8, 2);
+}
+if (dstW - i >= 8){
+int j;
+__m256i src0, filter0, val_h;
+__m256i val_l;
+
+val_l = __lasx_xvslli_w(val3, 12);
+
+for (j = 0; j < filterSize; j++) {
+src0  = __lasx_xvld(src[j] + i, 0);
+src0  = __lasx_vext2xv_w_h(src0);
+filter0 = __lasx_xvldrepl_h((filter + j), 0);
+filter0 = __lasx_vext2xv_w_h(filter0);
+val_l = __lasx_xvmadd_w(val_l, src0, filter0);
+}
+val_l = __lasx_xvsrai_w(val_l, 19);
+val_l = __lasx_xvclip255_w(val_l);
+val_h = 

[FFmpeg-devel] [PATCH v4 1/3] swscale/la: Optimize hscale functions with lasx.

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 -y /dev/null -an
before: 101fps
after:  138fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |   3 +
 libswscale/loongarch/input_lasx.c | 202 
 libswscale/loongarch/swscale_init_loongarch.c |  50 +
 libswscale/loongarch/swscale_lasx.c   | 972 ++
 libswscale/loongarch/swscale_loongarch.h  |  50 +
 libswscale/swscale.c  |   2 +
 libswscale/swscale_internal.h |   2 +
 libswscale/utils.c|  13 +-
 8 files changed, 1293 insertions(+), 1 deletion(-)
 create mode 100644 libswscale/loongarch/Makefile
 create mode 100644 libswscale/loongarch/input_lasx.c
 create mode 100644 libswscale/loongarch/swscale_init_loongarch.c
 create mode 100644 libswscale/loongarch/swscale_lasx.c
 create mode 100644 libswscale/loongarch/swscale_loongarch.h

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
new file mode 100644
index 00..586a1717b6
--- /dev/null
+++ b/libswscale/loongarch/Makefile
@@ -0,0 +1,3 @@
+OBJS-$(CONFIG_SWSCALE)  += loongarch/swscale_init_loongarch.o
+LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
+   loongarch/input_lasx.o   \
diff --git a/libswscale/loongarch/input_lasx.c 
b/libswscale/loongarch/input_lasx.c
new file mode 100644
index 00..4830072eaf
--- /dev/null
+++ b/libswscale/loongarch/input_lasx.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void planar_rgb_to_uv_lasx(uint8_t *_dstU, uint8_t *_dstV, const uint8_t 
*src[4],
+   int width, int32_t *rgb2yuv, void *opq)
+{
+int i;
+uint16_t *dstU   = (uint16_t *)_dstU;
+uint16_t *dstV   = (uint16_t *)_dstV;
+int set  = 0x4001 << (RGB2YUV_SHIFT - 7);
+int len  = width - 15;
+int32_t tem_ru   = rgb2yuv[RU_IDX], tem_gu = rgb2yuv[GU_IDX];
+int32_t tem_bu = rgb2yuv[BU_IDX], tem_rv   = rgb2yuv[RV_IDX];
+int32_t tem_gv = rgb2yuv[GV_IDX], tem_bv = rgb2yuv[BV_IDX];
+int shift= RGB2YUV_SHIFT - 6;
+const uint8_t *src0 = src[0], *src1 = src[1], *src2 = src[2];
+__m256i ru, gu, bu, rv, gv, bv;
+__m256i mask = {0x0D0C090805040100, 0x1D1C191815141110,
+0x0D0C090805040100, 0x1D1C191815141110};
+__m256i temp = __lasx_xvreplgr2vr_w(set);
+__m256i sra  = __lasx_xvreplgr2vr_w(shift);
+
+ru = __lasx_xvreplgr2vr_w(tem_ru);
+gu = __lasx_xvreplgr2vr_w(tem_gu);
+bu = __lasx_xvreplgr2vr_w(tem_bu);
+rv = __lasx_xvreplgr2vr_w(tem_rv);
+gv = __lasx_xvreplgr2vr_w(tem_gv);
+bv = __lasx_xvreplgr2vr_w(tem_bv);
+for (i = 0; i < len; i += 16) {
+__m256i _g, _b, _r;
+__m256i g_l, g_h, b_l, b_h, r_l, r_h;
+__m256i v_l, v_h, u_l, u_h, u_lh, v_lh;
+
+_g  = __lasx_xvldx(src0, i);
+_b  = __lasx_xvldx(src1, i);
+_r  = __lasx_xvldx(src2, i);
+g_l = __lasx_vext2xv_wu_bu(_g);
+b_l = __lasx_vext2xv_wu_bu(_b);
+r_l = __lasx_vext2xv_wu_bu(_r);
+_g  = __lasx_xvpermi_d(_g, 0x01);
+_b  = __lasx_xvpermi_d(_b, 0x01);
+_r  = __lasx_xvpermi_d(_r, 0x01);
+g_h = __lasx_vext2xv_wu_bu(_g);
+b_h = __lasx_vext2xv_wu_bu(_b);
+r_h = __lasx_vext2xv_wu_bu(_r);
+u_l  = __lasx_xvmadd_w(temp, ru, r_l);
+u_h  = __lasx_xvmadd_w(temp, ru, r_h);
+v_l  = __lasx_xvmadd_w(temp, rv, r_l);
+v_h  = __lasx_xvmadd_w(temp, rv, r_h);
+u_l  = __lasx_xvmadd_w(u_l, gu, g_l);
+u_l  = __lasx_xvmadd_w(u_l, bu, b_l);
+u_h  = __lasx_xvmadd_w(u_h, gu, g_h);
+u_h  = __lasx_xvmadd_w(u_h, bu, b_h);
+v_l  = __lasx_xvmadd_w(v_l, gv, g_l);
+v_l  = __lasx_xvmadd_w(v_l, bv, b_l);
+v_h  = __lasx_xvmadd_w(v_h, gv, g_h);
+v_h  = __lasx_xvmadd_w(v_h, bv, b_h);
+u_l  = __lasx_xvsra_w(u_l, sra);
+u_h  = __lasx_xvsra_w(u_h, sra);
+v_l  = 

[FFmpeg-devel] Add LoongArch SIMD optimization in swscale lib.

2022-09-08 Thread Hao Chen
v2: Some modifications were made according to the comments of the reviewers.
v3: Update and run CI test again.
v4: Resolve the warning for the build.

[PATCH v4 1/3] swscale/la: Optimize hscale functions with lasx.
[PATCH v4 2/3] swscale/la: Add yuv2rgb_lasx.c and rgb2rgb_lasx.c
[PATCH v4 3/3] swscale/la: Add output_lasx.c file.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v4 2/3] swscale/la: Add yuv2rgb_lasx.c and rgb2rgb_lasx.c files

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -pix_fmt rgb24 -y /dev/null 
-an
before: 178fps
after:  210fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |   2 +
 libswscale/loongarch/rgb2rgb_lasx.c   |  52 +++
 libswscale/loongarch/swscale_init_loongarch.c |  42 +++
 libswscale/loongarch/swscale_loongarch.h  |  22 ++
 libswscale/loongarch/yuv2rgb_lasx.c   | 321 ++
 libswscale/rgb2rgb.c  |   2 +
 libswscale/rgb2rgb.h  |   1 +
 libswscale/yuv2rgb.c  |   2 +
 8 files changed, 444 insertions(+)
 create mode 100644 libswscale/loongarch/rgb2rgb_lasx.c
 create mode 100644 libswscale/loongarch/yuv2rgb_lasx.c

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index 586a1717b6..4345971514 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -1,3 +1,5 @@
 OBJS-$(CONFIG_SWSCALE)  += loongarch/swscale_init_loongarch.o
 LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
loongarch/input_lasx.o   \
+   loongarch/yuv2rgb_lasx.o \
+   loongarch/rgb2rgb_lasx.o
diff --git a/libswscale/loongarch/rgb2rgb_lasx.c 
b/libswscale/loongarch/rgb2rgb_lasx.c
new file mode 100644
index 00..1b6be90217
--- /dev/null
+++ b/libswscale/loongarch/rgb2rgb_lasx.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void ff_interleave_bytes_lasx(const uint8_t *src1, const uint8_t *src2,
+  uint8_t *dest, int width, int height,
+  int src1Stride, int src2Stride, int dstStride)
+{
+int h;
+int len = width & (0xFFF0);
+
+for (h = 0; h < height; h++) {
+int w, index = 0;
+__m256i src_1, src_2, dst;
+
+for (w = 0; w < len; w += 16) {
+DUP2_ARG2(__lasx_xvld, src1 + w, 0, src2 + w, 0, src_1, src_2);
+src_1 = __lasx_xvpermi_d(src_1, 0xD8);
+src_2 = __lasx_xvpermi_d(src_2, 0xD8);
+dst   = __lasx_xvilvl_b(src_2, src_1);
+__lasx_xvst(dst, dest + index, 0);
+index  += 32;
+}
+for (; w < width; w++) {
+dest[(w << 1) + 0] = src1[w];
+dest[(w << 1) + 1] = src2[w];
+}
+dest += dstStride;
+src1 += src1Stride;
+src2 += src2Stride;
+}
+}
diff --git a/libswscale/loongarch/swscale_init_loongarch.c 
b/libswscale/loongarch/swscale_init_loongarch.c
index 197dc6e1e7..1e0bb1b116 100644
--- a/libswscale/loongarch/swscale_init_loongarch.c
+++ b/libswscale/loongarch/swscale_init_loongarch.c
@@ -21,6 +21,7 @@
 
 #include "swscale_loongarch.h"
 #include "libswscale/swscale_internal.h"
+#include "libswscale/rgb2rgb.h"
 #include "libavutil/loongarch/cpu.h"
 
 av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
@@ -48,3 +49,44 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
 }
 }
 }
+
+av_cold void rgb2rgb_init_loongarch(void)
+{
+int cpu_flags = av_get_cpu_flags();
+if (have_lasx(cpu_flags))
+interleaveBytes = ff_interleave_bytes_lasx;
+}
+
+av_cold SwsFunc ff_yuv2rgb_init_loongarch(SwsContext *c)
+{
+int cpu_flags = av_get_cpu_flags();
+if (have_lasx(cpu_flags)) {
+switch (c->dstFormat) {
+case AV_PIX_FMT_RGB24:
+return yuv420_rgb24_lasx;
+case AV_PIX_FMT_BGR24:
+return yuv420_bgr24_lasx;
+case AV_PIX_FMT_RGBA:
+if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) {
+break;
+} else
+return yuv420_rgba32_lasx;
+case AV_PIX_FMT_ARGB:
+if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) {
+break;
+} else
+return yuv420_argb32_lasx;
+case AV_PIX_FMT_BGRA:
+if (CONFIG_SWSCALE_ALPHA && 

Re: [FFmpeg-devel] [PATCH 2/2] libavformat/hls: Free keys

2022-09-08 Thread Steven Liu
Michael Niedermayer  于2022年9月9日周五 06:45写道:
>
> Fixes: memleak
> Fixes: 
> 50703/clusterfuzz-testcase-minimized-ffmpeg_dem_HLS_fuzzer-6399058578636800
>
> Found-by: continuous fuzzing process 
> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> Signed-off-by: Michael Niedermayer 
> ---
>  libavformat/hls.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/libavformat/hls.c b/libavformat/hls.c
> index 3dc7bd39309..e622425e801 100644
> --- a/libavformat/hls.c
> +++ b/libavformat/hls.c
> @@ -250,6 +250,7 @@ static void free_init_section_list(struct playlist *pls)
>  {
>  int i;
>  for (i = 0; i < pls->n_init_sections; i++) {
> +av_freep(>init_sections[i]->key);
>  av_freep(>init_sections[i]->url);
>  av_freep(>init_sections[i]);
>  }
> --
> 2.17.1
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

lgtm

Thanks
Steven
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v3 3/3] swscale/la: Add output_lasx.c file.

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 -pix_fmt
rgb24 -y /dev/null -an
before: 150fps
after:  183fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |3 +-
 libswscale/loongarch/output_lasx.c| 1982 +
 libswscale/loongarch/swscale_init_loongarch.c |3 +
 libswscale/loongarch/swscale_loongarch.h  |6 +
 4 files changed, 1993 insertions(+), 1 deletion(-)
 create mode 100644 libswscale/loongarch/output_lasx.c

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index 4345971514..8e665e826c 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -2,4 +2,5 @@ OBJS-$(CONFIG_SWSCALE)  += 
loongarch/swscale_init_loongarch.o
 LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
loongarch/input_lasx.o   \
loongarch/yuv2rgb_lasx.o \
-   loongarch/rgb2rgb_lasx.o
+   loongarch/rgb2rgb_lasx.o \
+   loongarch/output_lasx.o
diff --git a/libswscale/loongarch/output_lasx.c 
b/libswscale/loongarch/output_lasx.c
new file mode 100644
index 00..ee741e210d
--- /dev/null
+++ b/libswscale/loongarch/output_lasx.c
@@ -0,0 +1,1982 @@
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void ff_yuv2planeX_8_lasx(const int16_t *filter, int filterSize,
+  const int16_t **src, uint8_t *dest, int dstW,
+  const uint8_t *dither, int offset)
+{
+int i;
+int len = dstW - 15;
+__m256i mask = {0x1C0C180814041000, 0x1C1814100C080400,
+0x1C0C180814041000, 0x1C1814100C080400};
+__m256i val1, val2, val3;
+uint8_t dither0 = dither[offset & 7];
+uint8_t dither1 = dither[(offset + 1) & 7];
+uint8_t dither2 = dither[(offset + 2) & 7];
+uint8_t dither3 = dither[(offset + 3) & 7];
+uint8_t dither4 = dither[(offset + 4) & 7];
+uint8_t dither5 = dither[(offset + 5) & 7];
+uint8_t dither6 = dither[(offset + 6) & 7];
+uint8_t dither7 = dither[(offset + 7) & 7];
+int val_1[8] = {dither0, dither2, dither4, dither6,
+dither0, dither2, dither4, dither6};
+int val_2[8] = {dither1, dither3, dither5, dither7,
+dither1, dither3, dither5, dither7};
+int val_3[8] = {dither0, dither1, dither2, dither3,
+dither4, dither5, dither6, dither7};
+
+DUP2_ARG2(__lasx_xvld, val_1, 0, val_2, 0, val1, val2);
+val3 = __lasx_xvld(val_3, 0);
+
+for (i = 0; i < len; i += 16) {
+int j;
+__m256i src0, filter0, val;
+__m256i val_ev, val_od;
+
+val_ev = __lasx_xvslli_w(val1, 12);
+val_od = __lasx_xvslli_w(val2, 12);
+
+for (j = 0; j < filterSize; j++) {
+src0  = __lasx_xvld(src[j]+ i, 0);
+filter0 = __lasx_xvldrepl_h((filter + j), 0);
+val_ev = __lasx_xvmaddwev_w_h(val_ev, src0, filter0);
+val_od = __lasx_xvmaddwod_w_h(val_od, src0, filter0);
+}
+val_ev = __lasx_xvsrai_w(val_ev, 19);
+val_od = __lasx_xvsrai_w(val_od, 19);
+val_ev = __lasx_xvclip255_w(val_ev);
+val_od = __lasx_xvclip255_w(val_od);
+val= __lasx_xvshuf_b(val_od, val_ev, mask);
+__lasx_xvstelm_d(val, (dest + i), 0, 0);
+__lasx_xvstelm_d(val, (dest + i), 8, 2);
+}
+if (dstW - i >= 8){
+int j;
+__m256i src0, filter0, val_h;
+__m256i val_l;
+
+val_l = __lasx_xvslli_w(val3, 12);
+
+for (j = 0; j < filterSize; j++) {
+src0  = __lasx_xvld(src[j] + i, 0);
+src0  = __lasx_vext2xv_w_h(src0);
+filter0 = __lasx_xvldrepl_h((filter + j), 0);
+filter0 = __lasx_vext2xv_w_h(filter0);
+val_l = __lasx_xvmadd_w(val_l, src0, filter0);
+}
+val_l = __lasx_xvsrai_w(val_l, 19);
+val_l = __lasx_xvclip255_w(val_l);
+val_h = 

[FFmpeg-devel] [PATCH v3 2/3] swscale/la: Add yuv2rgb_lasx.c and rgb2rgb_lasx.c files

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -pix_fmt rgb24 -y /dev/null 
-an
before: 178fps
after:  210fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |   2 +
 libswscale/loongarch/rgb2rgb_lasx.c   |  52 +++
 libswscale/loongarch/swscale_init_loongarch.c |  42 +++
 libswscale/loongarch/swscale_loongarch.h  |  22 ++
 libswscale/loongarch/yuv2rgb_lasx.c   | 321 ++
 libswscale/rgb2rgb.c  |   2 +
 libswscale/rgb2rgb.h  |   1 +
 libswscale/yuv2rgb.c  |   2 +
 8 files changed, 444 insertions(+)
 create mode 100644 libswscale/loongarch/rgb2rgb_lasx.c
 create mode 100644 libswscale/loongarch/yuv2rgb_lasx.c

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
index 586a1717b6..4345971514 100644
--- a/libswscale/loongarch/Makefile
+++ b/libswscale/loongarch/Makefile
@@ -1,3 +1,5 @@
 OBJS-$(CONFIG_SWSCALE)  += loongarch/swscale_init_loongarch.o
 LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
loongarch/input_lasx.o   \
+   loongarch/yuv2rgb_lasx.o \
+   loongarch/rgb2rgb_lasx.o
diff --git a/libswscale/loongarch/rgb2rgb_lasx.c 
b/libswscale/loongarch/rgb2rgb_lasx.c
new file mode 100644
index 00..1b6be90217
--- /dev/null
+++ b/libswscale/loongarch/rgb2rgb_lasx.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void ff_interleave_bytes_lasx(const uint8_t *src1, const uint8_t *src2,
+  uint8_t *dest, int width, int height,
+  int src1Stride, int src2Stride, int dstStride)
+{
+int h;
+int len = width & (0xFFF0);
+
+for (h = 0; h < height; h++) {
+int w, index = 0;
+__m256i src_1, src_2, dst;
+
+for (w = 0; w < len; w += 16) {
+DUP2_ARG2(__lasx_xvld, src1 + w, 0, src2 + w, 0, src_1, src_2);
+src_1 = __lasx_xvpermi_d(src_1, 0xD8);
+src_2 = __lasx_xvpermi_d(src_2, 0xD8);
+dst   = __lasx_xvilvl_b(src_2, src_1);
+__lasx_xvst(dst, dest + index, 0);
+index  += 32;
+}
+for (; w < width; w++) {
+dest[(w << 1) + 0] = src1[w];
+dest[(w << 1) + 1] = src2[w];
+}
+dest += dstStride;
+src1 += src1Stride;
+src2 += src2Stride;
+}
+}
diff --git a/libswscale/loongarch/swscale_init_loongarch.c 
b/libswscale/loongarch/swscale_init_loongarch.c
index 197dc6e1e7..1e0bb1b116 100644
--- a/libswscale/loongarch/swscale_init_loongarch.c
+++ b/libswscale/loongarch/swscale_init_loongarch.c
@@ -21,6 +21,7 @@
 
 #include "swscale_loongarch.h"
 #include "libswscale/swscale_internal.h"
+#include "libswscale/rgb2rgb.h"
 #include "libavutil/loongarch/cpu.h"
 
 av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
@@ -48,3 +49,44 @@ av_cold void ff_sws_init_swscale_loongarch(SwsContext *c)
 }
 }
 }
+
+av_cold void rgb2rgb_init_loongarch(void)
+{
+int cpu_flags = av_get_cpu_flags();
+if (have_lasx(cpu_flags))
+interleaveBytes = ff_interleave_bytes_lasx;
+}
+
+av_cold SwsFunc ff_yuv2rgb_init_loongarch(SwsContext *c)
+{
+int cpu_flags = av_get_cpu_flags();
+if (have_lasx(cpu_flags)) {
+switch (c->dstFormat) {
+case AV_PIX_FMT_RGB24:
+return yuv420_rgb24_lasx;
+case AV_PIX_FMT_BGR24:
+return yuv420_bgr24_lasx;
+case AV_PIX_FMT_RGBA:
+if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) {
+break;
+} else
+return yuv420_rgba32_lasx;
+case AV_PIX_FMT_ARGB:
+if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat)) {
+break;
+} else
+return yuv420_argb32_lasx;
+case AV_PIX_FMT_BGRA:
+if (CONFIG_SWSCALE_ALPHA && 

[FFmpeg-devel] [PATCH v3 1/3] swscale/la: Optimize hscale functions with lasx.

2022-09-08 Thread Hao Chen
ffmpeg -i 1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 -y /dev/null -an
before: 101fps
after:  138fps

Signed-off-by: Hao Chen 
---
 libswscale/loongarch/Makefile |   3 +
 libswscale/loongarch/input_lasx.c | 202 
 libswscale/loongarch/swscale_init_loongarch.c |  50 +
 libswscale/loongarch/swscale_lasx.c   | 972 ++
 libswscale/loongarch/swscale_loongarch.h  |  50 +
 libswscale/swscale.c  |   2 +
 libswscale/swscale_internal.h |   2 +
 libswscale/utils.c|  13 +-
 8 files changed, 1293 insertions(+), 1 deletion(-)
 create mode 100644 libswscale/loongarch/Makefile
 create mode 100644 libswscale/loongarch/input_lasx.c
 create mode 100644 libswscale/loongarch/swscale_init_loongarch.c
 create mode 100644 libswscale/loongarch/swscale_lasx.c
 create mode 100644 libswscale/loongarch/swscale_loongarch.h

diff --git a/libswscale/loongarch/Makefile b/libswscale/loongarch/Makefile
new file mode 100644
index 00..586a1717b6
--- /dev/null
+++ b/libswscale/loongarch/Makefile
@@ -0,0 +1,3 @@
+OBJS-$(CONFIG_SWSCALE)  += loongarch/swscale_init_loongarch.o
+LASX-OBJS-$(CONFIG_SWSCALE) += loongarch/swscale_lasx.o \
+   loongarch/input_lasx.o   \
diff --git a/libswscale/loongarch/input_lasx.c 
b/libswscale/loongarch/input_lasx.c
new file mode 100644
index 00..c3060ea6a3
--- /dev/null
+++ b/libswscale/loongarch/input_lasx.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ * Contributed by Hao Chen(chen...@loongson.cn)
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "swscale_loongarch.h"
+#include "libavutil/loongarch/loongson_intrinsics.h"
+
+void planar_rgb_to_uv_lasx(uint8_t *_dstU, uint8_t *_dstV, const uint8_t 
*src[4],
+   int width, int32_t *rgb2yuv)
+{
+int i;
+uint16_t *dstU   = (uint16_t *)_dstU;
+uint16_t *dstV   = (uint16_t *)_dstV;
+int set  = 0x4001 << (RGB2YUV_SHIFT - 7);
+int len  = width - 15;
+int32_t tem_ru   = rgb2yuv[RU_IDX], tem_gu = rgb2yuv[GU_IDX];
+int32_t tem_bu = rgb2yuv[BU_IDX], tem_rv   = rgb2yuv[RV_IDX];
+int32_t tem_gv = rgb2yuv[GV_IDX], tem_bv = rgb2yuv[BV_IDX];
+int shift= RGB2YUV_SHIFT - 6;
+const uint8_t *src0 = src[0], *src1 = src[1], *src2 = src[2];
+__m256i ru, gu, bu, rv, gv, bv;
+__m256i mask = {0x0D0C090805040100, 0x1D1C191815141110,
+0x0D0C090805040100, 0x1D1C191815141110};
+__m256i temp = __lasx_xvreplgr2vr_w(set);
+__m256i sra  = __lasx_xvreplgr2vr_w(shift);
+
+ru = __lasx_xvreplgr2vr_w(tem_ru);
+gu = __lasx_xvreplgr2vr_w(tem_gu);
+bu = __lasx_xvreplgr2vr_w(tem_bu);
+rv = __lasx_xvreplgr2vr_w(tem_rv);
+gv = __lasx_xvreplgr2vr_w(tem_gv);
+bv = __lasx_xvreplgr2vr_w(tem_bv);
+for (i = 0; i < len; i += 16) {
+__m256i _g, _b, _r;
+__m256i g_l, g_h, b_l, b_h, r_l, r_h;
+__m256i v_l, v_h, u_l, u_h, u_lh, v_lh;
+
+_g  = __lasx_xvldx(src0, i);
+_b  = __lasx_xvldx(src1, i);
+_r  = __lasx_xvldx(src2, i);
+g_l = __lasx_vext2xv_wu_bu(_g);
+b_l = __lasx_vext2xv_wu_bu(_b);
+r_l = __lasx_vext2xv_wu_bu(_r);
+_g  = __lasx_xvpermi_d(_g, 0x01);
+_b  = __lasx_xvpermi_d(_b, 0x01);
+_r  = __lasx_xvpermi_d(_r, 0x01);
+g_h = __lasx_vext2xv_wu_bu(_g);
+b_h = __lasx_vext2xv_wu_bu(_b);
+r_h = __lasx_vext2xv_wu_bu(_r);
+u_l  = __lasx_xvmadd_w(temp, ru, r_l);
+u_h  = __lasx_xvmadd_w(temp, ru, r_h);
+v_l  = __lasx_xvmadd_w(temp, rv, r_l);
+v_h  = __lasx_xvmadd_w(temp, rv, r_h);
+u_l  = __lasx_xvmadd_w(u_l, gu, g_l);
+u_l  = __lasx_xvmadd_w(u_l, bu, b_l);
+u_h  = __lasx_xvmadd_w(u_h, gu, g_h);
+u_h  = __lasx_xvmadd_w(u_h, bu, b_h);
+v_l  = __lasx_xvmadd_w(v_l, gv, g_l);
+v_l  = __lasx_xvmadd_w(v_l, bv, b_l);
+v_h  = __lasx_xvmadd_w(v_h, gv, g_h);
+v_h  = __lasx_xvmadd_w(v_h, bv, b_h);
+u_l  = __lasx_xvsra_w(u_l, sra);
+u_h  = __lasx_xvsra_w(u_h, sra);
+v_l  = __lasx_xvsra_w(v_l, sra);
+   

[FFmpeg-devel] Add LoongArch SIMD optimization in swscale lib.

2022-09-08 Thread Hao Chen
v2: Some modifications were made according to the comments of the reviewers.
v3: Update and run CI test again.

[PATCH v3 1/3] swscale/la: Optimize hscale functions with lasx.
[PATCH v3 2/3] swscale/la: Add yuv2rgb_lasx.c and rgb2rgb_lasx.c
[PATCH v3 3/3] swscale/la: Add output_lasx.c file.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v1 3/3] swscale/la: Add output_lasx.c file.

2022-09-08 Thread Shiyou Yin


> 2022年9月6日 16:12,Shiyou Yin  写道:
> 
>> 
>> 2022年8月29日 20:30,Andreas Rheinhardt > > 写道:
>> 
>> Hao Chen:
>>> ffmpeg -i ~/media/1_h264_1080p_30fps_3Mbps.mp4 -f rawvideo -s 640x480 
>>> -pix_fmt
>>> rgb24 -y /dev/null -an
>>> before: 150fps
>>> after: 183fps
>>> 
>>> Signed-off-by: Hao Chen 
>>> ---
>>> libswscale/loongarch/Makefile | 3 +-
>>> libswscale/loongarch/output_lasx.c | 1982 +
>>> libswscale/loongarch/swscale_init_loongarch.c | 3 +
>>> libswscale/loongarch/swscale_loongarch.h | 6 +
>>> 4 files changed, 1993 insertions(+), 1 deletion(-)
>>> create mode 100644 libswscale/loongarch/output_lasx.c
>>> 

>>> +static void
>>> +yuv2rgb_2_template_lasx(SwsContext *c, const int16_t *buf[2],
>>> + const int16_t *ubuf[2], const int16_t *vbuf[2],
>>> + const int16_t *abuf[2], uint8_t *dest, int dstW,
>>> + int yalpha, int uvalpha, int y,
>>> + enum AVPixelFormat target, int hasAlpha)
>>> +{
>>> + const int16_t *buf0 = buf[0], *buf1 = buf[1],
>>> + *ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
>>> + *vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
>>> + int yalpha1 = 4096 - yalpha;
>>> + int uvalpha1 = 4096 - uvalpha;
>>> + int i, count = 0;
>>> + int len = dstW - 15;
>>> + int len_count = (dstW + 1) >> 1;
>>> + const void *r, *g, *b;
>>> + int head = YUVRGB_TABLE_HEADROOM;
>>> + __m256i v_yalpha1 = __lasx_xvreplgr2vr_w(yalpha1);
>>> + __m256i v_uvalpha1 = __lasx_xvreplgr2vr_w(uvalpha1);
>>> + __m256i v_yalpha = __lasx_xvreplgr2vr_w(yalpha);
>>> + __m256i v_uvalpha = __lasx_xvreplgr2vr_w(uvalpha);
>>> + __m256i headroom = __lasx_xvreplgr2vr_w(head);
>>> +
>>> + for (i = 0; i < len; i += 16) {
>>> + int Y1, Y2, U, V;
>>> + int i_dex = i << 1;
>>> + int c_dex = count << 1;
>>> + __m256i y0_h, y0_l, y0, u0, v0;
>>> + __m256i y1_h, y1_l, y1, u1, v1;
>>> + __m256i y_l, y_h, u, v;
>>> +
>>> + DUP4_ARG2(__lasx_xvldx, buf0, i_dex, ubuf0, c_dex, vbuf0, c_dex,
>>> + buf1, i_dex, y0, u0, v0, y1);
>>> + DUP2_ARG2(__lasx_xvldx, ubuf1, c_dex, vbuf1, c_dex, u1, v1);
>>> + DUP2_ARG2(__lasx_xvsllwil_w_h, y0, 0, y1, 0, y0_l, y1_l);
>>> + DUP2_ARG1(__lasx_xvexth_w_h, y0, y1, y0_h, y1_h);
>>> + DUP4_ARG1(__lasx_vext2xv_w_h, u0, u1, v0, v1, u0, u1, v0, v1);
>>> + y0_l = __lasx_xvmul_w(y0_l, v_yalpha1);
>>> + y0_h = __lasx_xvmul_w(y0_h, v_yalpha1);
>>> + u0 = __lasx_xvmul_w(u0, v_uvalpha1);
>>> + v0 = __lasx_xvmul_w(v0, v_uvalpha1);
>>> + y_l = __lasx_xvmadd_w(y0_l, v_yalpha, y1_l);
>>> + y_h = __lasx_xvmadd_w(y0_h, v_yalpha, y1_h);
>>> + u = __lasx_xvmadd_w(u0, v_uvalpha, u1);
>>> + v = __lasx_xvmadd_w(v0, v_uvalpha, v1);
>>> + y_l = __lasx_xvsrai_w(y_l, 19);
>>> + y_h = __lasx_xvsrai_w(y_h, 19);
>>> + u = __lasx_xvsrai_w(u, 19);
>>> + v = __lasx_xvsrai_w(v, 19);
>>> + u = __lasx_xvadd_w(u, headroom);
>>> + v = __lasx_xvadd_w(v, headroom);
>>> + WRITE_YUV2RGB(y_l, y_l, u, v, 0, 1, 0, 0);
>>> + WRITE_YUV2RGB(y_l, y_l, u, v, 2, 3, 1, 1);
>>> + WRITE_YUV2RGB(y_h, y_h, u, v, 0, 1, 2, 2);
>>> + WRITE_YUV2RGB(y_h, y_h, u, v, 2, 3, 3, 3);
>>> + WRITE_YUV2RGB(y_l, y_l, u, v, 4, 5, 4, 4);
>>> + WRITE_YUV2RGB(y_l, y_l, u, v, 6, 7, 5, 5);
>>> + WRITE_YUV2RGB(y_h, y_h, u, v, 4, 5, 6, 6);
>>> + WRITE_YUV2RGB(y_h, y_h, u, v, 6, 7, 7, 7);
>>> + }
>>> + if (dstW - i >= 8) {
>>> + int Y1, Y2, U, V;
>>> + int i_dex = i << 1;
>>> + __m256i y0_l, y0, u0, v0;
>>> + __m256i y1_l, y1, u1, v1;
>>> + __m256i y_l, u, v;
>>> +
>>> + y0 = __lasx_xvldx(buf0, i_dex);
>> 
>> 1. Not long ago, I tried to constify the src pointer of several asm
>> functions and noticed that they produced new warnings for loongarch
>> (according to patchwork:
>> https://patchwork.ffmpeg.org/project/ffmpeg/patch/db6pr0101mb2214178d3e6b8dca5b86f8198f...@db6pr0101mb2214.eurprd01.prod.exchangelabs.com/),
>> even though I was sure that the code is const-correct. After finding
>> (via https://github.com/opencv/opencv/pull/21833) a toolchain
>> (https://gitee.com/wenux/cross-compiler-la-on-x86) that can build the
>> lasx and lsx code (upstream GCC seems to be lacking lsx and lasx support
>> at the moment; at least, my self-compiled loongarch-GCC did not support
>> lsx and lasx) the issue was clear: lsxintrin.h and lasxintrin.h do not
>> use const at all, even for functions that only read data (I presume the
>> vl in __lsx_vldx stands for "vector load"?).
>> So I sent another iteration
>> https://ffmpeg.org/pipermail/ffmpeg-devel/2022-August/299562.html of
>> that patchset that now added wrappers for __lsx_vldx() and
>> __lasx_xvldx() and cc'ed you and some other developers from loongson to
>> alert you of the issue in the hope that you fix the headers, so that my
>> wrappers wouldn't need to be applied. That didn't work, as my mails
>> could not be delivered to you. So I applied the patchset.
>> 2. You use __lasx_xvldx() to read from a const int16_t. This will give
>> new warnings unless the above issue has been fixed. Has it?
>> 3. I don't know whether it has, as patchwork's fate tests don't work for
>> a few days already. Given that the mails I receive 

Re: [FFmpeg-devel] [PATCH] swsresample/swresample: abort on invalid layouts

2022-09-08 Thread James Almer

On 9/8/2022 7:47 PM, Andreas Rheinhardt wrote:

James Almer:

If it's unsupported or invalid, then there's no point trying to rebuild it
using a value that may have been derived from the same layout to begin with.

Move the checks before the attempts at copying the layout while at it.

Fixes ticket #9908.

Signed-off-by: James Almer 
---
  libswresample/swresample.c | 48 +-
  1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 6f04d130d3..5884f8d533 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -227,7 +227,7 @@ av_cold int swr_init(struct SwrContext *s){
  s->in_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
  s->in_ch_layout.nb_channels = s->user_in_ch_count;
  }
-} else
+} else if (av_channel_layout_check(>user_in_chlayout))
  av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
  
  if ((s->user_out_ch_count && s->user_out_ch_count != s->user_out_chlayout.nb_channels) ||

@@ -240,17 +240,45 @@ av_cold int swr_init(struct SwrContext *s){
  s->out_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
  s->out_ch_layout.nb_channels = s->user_out_ch_count;
  }
-} else
+} else if (av_channel_layout_check(>user_out_chlayout))
  av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
  
  if (!s->out.ch_count && !s->user_out_ch_layout)

  s->out.ch_count  = s->out_ch_layout.nb_channels;
  if (!s-> in.ch_count && !s-> user_in_ch_layout)
  s-> in.ch_count  = s->in_ch_layout.nb_channels;
+
+if (!(ret = av_channel_layout_check(>in_ch_layout)) || 
s->in_ch_layout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
+av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l1 : "");
+return AVERROR(EINVAL);
+}
+
+if (!(ret = av_channel_layout_check(>out_ch_layout)) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
+av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l2 : "");
+return AVERROR(EINVAL);
+}
  #else
  s->out.ch_count  = s-> user_out_chlayout.nb_channels;
  s-> in.ch_count  = s->  user_in_chlayout.nb_channels;
  
+if (!(ret = av_channel_layout_check(>user_in_chlayout)) || s->user_in_chlayout.nb_channels > SWR_CH_MAX) {

+if (ret)
+av_channel_layout_describe(>user_in_chlayout, l1, sizeof(l1));
+av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l1 : "");
+return AVERROR(EINVAL);
+}
+
+if (!(ret = av_channel_layout_check(>user_out_chlayout)) || 
s->user_out_chlayout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>user_out_chlayout, l2, sizeof(l2));
+av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l2 : "");


Why are you using  AV_LOG_WARNING when you are erroring out?


+return AVERROR(EINVAL);
+}
+
  ret  = av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
  ret |= av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
  if (ret < 0)
@@ -261,18 +289,6 @@ av_cold int swr_init(struct SwrContext *s){
  
  s->dither.method = s->user_dither_method;
  
-if (!av_channel_layout_check(>in_ch_layout) || s->in_ch_layout.nb_channels > SWR_CH_MAX) {

-av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
-av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", l1);
-av_channel_layout_uninit(>in_ch_layout);
-}
-
-if (!av_channel_layout_check(>out_ch_layout) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {
-av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
-av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", l2);
-av_channel_layout_uninit(>out_ch_layout);
-}
-
  switch(s->engine){
  #if CONFIG_LIBSOXR
  case SWR_ENGINE_SOXR: s->resampler = _soxr_resampler; break;
@@ -291,9 +307,9 @@ av_cold int swr_init(struct SwrContext *s){
  av_channel_layout_uninit(>in_ch_layout);
  }
  
-if (!s->in_ch_layout.nb_channels || s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)

+if (s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
  av_channel_layout_default(>in_ch_layout, s->used_ch_count);
-if (!s->out_ch_layout.nb_channels || s->out_ch_layout.order == 
AV_CHANNEL_ORDER_UNSPEC)
+if (s->out_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
  av_channel_layout_default(>out_ch_layout, s->out.ch_count);
  
  s->rematrix = av_channel_layout_compare(>out_ch_layout, 

Re: [FFmpeg-devel] [PATCH] swsresample/swresample: abort on invalid layouts

2022-09-08 Thread Andreas Rheinhardt
James Almer:
> If it's unsupported or invalid, then there's no point trying to rebuild it
> using a value that may have been derived from the same layout to begin with.
> 
> Move the checks before the attempts at copying the layout while at it.
> 
> Fixes ticket #9908.
> 
> Signed-off-by: James Almer 
> ---
>  libswresample/swresample.c | 48 +-
>  1 file changed, 32 insertions(+), 16 deletions(-)
> 
> diff --git a/libswresample/swresample.c b/libswresample/swresample.c
> index 6f04d130d3..5884f8d533 100644
> --- a/libswresample/swresample.c
> +++ b/libswresample/swresample.c
> @@ -227,7 +227,7 @@ av_cold int swr_init(struct SwrContext *s){
>  s->in_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
>  s->in_ch_layout.nb_channels = s->user_in_ch_count;
>  }
> -} else
> +} else if (av_channel_layout_check(>user_in_chlayout))
>  av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
>  
>  if ((s->user_out_ch_count && s->user_out_ch_count != 
> s->user_out_chlayout.nb_channels) ||
> @@ -240,17 +240,45 @@ av_cold int swr_init(struct SwrContext *s){
>  s->out_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
>  s->out_ch_layout.nb_channels = s->user_out_ch_count;
>  }
> -} else
> +} else if (av_channel_layout_check(>user_out_chlayout))
>  av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
>  
>  if (!s->out.ch_count && !s->user_out_ch_layout)
>  s->out.ch_count  = s->out_ch_layout.nb_channels;
>  if (!s-> in.ch_count && !s-> user_in_ch_layout)
>  s-> in.ch_count  = s->in_ch_layout.nb_channels;
> +
> +if (!(ret = av_channel_layout_check(>in_ch_layout)) || 
> s->in_ch_layout.nb_channels > SWR_CH_MAX) {
> +if (ret)
> +av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
> +av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
> unsupported.\n", ret ? l1 : "");
> +return AVERROR(EINVAL);
> +}
> +
> +if (!(ret = av_channel_layout_check(>out_ch_layout)) || 
> s->out_ch_layout.nb_channels > SWR_CH_MAX) {
> +if (ret)
> +av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
> +av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid 
> or unsupported.\n", ret ? l2 : "");
> +return AVERROR(EINVAL);
> +}
>  #else
>  s->out.ch_count  = s-> user_out_chlayout.nb_channels;
>  s-> in.ch_count  = s->  user_in_chlayout.nb_channels;
>  
> +if (!(ret = av_channel_layout_check(>user_in_chlayout)) || 
> s->user_in_chlayout.nb_channels > SWR_CH_MAX) {
> +if (ret)
> +av_channel_layout_describe(>user_in_chlayout, l1, sizeof(l1));
> +av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
> unsupported.\n", ret ? l1 : "");
> +return AVERROR(EINVAL);
> +}
> +
> +if (!(ret = av_channel_layout_check(>user_out_chlayout)) || 
> s->user_out_chlayout.nb_channels > SWR_CH_MAX) {
> +if (ret)
> +av_channel_layout_describe(>user_out_chlayout, l2, 
> sizeof(l2));
> +av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid 
> or unsupported.\n", ret ? l2 : "");

Why are you using  AV_LOG_WARNING when you are erroring out?

> +return AVERROR(EINVAL);
> +}
> +
>  ret  = av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
>  ret |= av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
>  if (ret < 0)
> @@ -261,18 +289,6 @@ av_cold int swr_init(struct SwrContext *s){
>  
>  s->dither.method = s->user_dither_method;
>  
> -if (!av_channel_layout_check(>in_ch_layout) || 
> s->in_ch_layout.nb_channels > SWR_CH_MAX) {
> -av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
> -av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
> unsupported.\n", l1);
> -av_channel_layout_uninit(>in_ch_layout);
> -}
> -
> -if (!av_channel_layout_check(>out_ch_layout) || 
> s->out_ch_layout.nb_channels > SWR_CH_MAX) {
> -av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
> -av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid 
> or unsupported.\n", l2);
> -av_channel_layout_uninit(>out_ch_layout);
> -}
> -
>  switch(s->engine){
>  #if CONFIG_LIBSOXR
>  case SWR_ENGINE_SOXR: s->resampler = _soxr_resampler; break;
> @@ -291,9 +307,9 @@ av_cold int swr_init(struct SwrContext *s){
>  av_channel_layout_uninit(>in_ch_layout);
>  }
>  
> -if (!s->in_ch_layout.nb_channels || s->in_ch_layout.order == 
> AV_CHANNEL_ORDER_UNSPEC)
> +if (s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
>  av_channel_layout_default(>in_ch_layout, s->used_ch_count);
> -if (!s->out_ch_layout.nb_channels || s->out_ch_layout.order == 
> AV_CHANNEL_ORDER_UNSPEC)
> +if (s->out_ch_layout.order == 

[FFmpeg-devel] [PATCH 2/2] libavformat/hls: Free keys

2022-09-08 Thread Michael Niedermayer
Fixes: memleak
Fixes: 
50703/clusterfuzz-testcase-minimized-ffmpeg_dem_HLS_fuzzer-6399058578636800

Found-by: continuous fuzzing process 
https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
Signed-off-by: Michael Niedermayer 
---
 libavformat/hls.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/hls.c b/libavformat/hls.c
index 3dc7bd39309..e622425e801 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -250,6 +250,7 @@ static void free_init_section_list(struct playlist *pls)
 {
 int i;
 for (i = 0; i < pls->n_init_sections; i++) {
+av_freep(>init_sections[i]->key);
 av_freep(>init_sections[i]->url);
 av_freep(>init_sections[i]);
 }
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/2] tools/target_dec_fuzzer: Adjust threshold for UTVIDEO

2022-09-08 Thread Michael Niedermayer
Fixes: Timeout
Fixes: 
47969/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_UTVIDEO_fuzzer-5097256832860160

Found-by: continuous fuzzing process 
https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
Signed-off-by: Michael Niedermayer 
---
 tools/target_dec_fuzzer.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/target_dec_fuzzer.c b/tools/target_dec_fuzzer.c
index aa3ba0e5239..5b335d3130c 100644
--- a/tools/target_dec_fuzzer.c
+++ b/tools/target_dec_fuzzer.c
@@ -281,6 +281,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t 
size) {
 case AV_CODEC_ID_TQI: maxpixels  /= 1024;  break;
 case AV_CODEC_ID_TRUEMOTION2: maxpixels  /= 1024;  break;
 case AV_CODEC_ID_TSCC:maxpixels  /= 1024;  break;
+case AV_CODEC_ID_UTVIDEO: maxpixels  /= 1024;  break;
 case AV_CODEC_ID_VB:  maxpixels  /= 1024;  break;
 case AV_CODEC_ID_VC1: maxpixels  /= 8192;  break;
 case AV_CODEC_ID_VC1IMAGE:maxpixels  /= 8192;  break;
-- 
2.17.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] swsresample/swresample: abort on invalid layouts

2022-09-08 Thread James Almer
If it's unsupported or invalid, then there's no point trying to rebuild it
using a value that may have been derived from the same layout to begin with.

Move the checks before the attempts at copying the layout while at it.

Fixes ticket #9908.

Signed-off-by: James Almer 
---
 libswresample/swresample.c | 48 +-
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/libswresample/swresample.c b/libswresample/swresample.c
index 6f04d130d3..5884f8d533 100644
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -227,7 +227,7 @@ av_cold int swr_init(struct SwrContext *s){
 s->in_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
 s->in_ch_layout.nb_channels = s->user_in_ch_count;
 }
-} else
+} else if (av_channel_layout_check(>user_in_chlayout))
 av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
 
 if ((s->user_out_ch_count && s->user_out_ch_count != 
s->user_out_chlayout.nb_channels) ||
@@ -240,17 +240,45 @@ av_cold int swr_init(struct SwrContext *s){
 s->out_ch_layout.order   = AV_CHANNEL_ORDER_UNSPEC;
 s->out_ch_layout.nb_channels = s->user_out_ch_count;
 }
-} else
+} else if (av_channel_layout_check(>user_out_chlayout))
 av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
 
 if (!s->out.ch_count && !s->user_out_ch_layout)
 s->out.ch_count  = s->out_ch_layout.nb_channels;
 if (!s-> in.ch_count && !s-> user_in_ch_layout)
 s-> in.ch_count  = s->in_ch_layout.nb_channels;
+
+if (!(ret = av_channel_layout_check(>in_ch_layout)) || 
s->in_ch_layout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
+av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l1 : "");
+return AVERROR(EINVAL);
+}
+
+if (!(ret = av_channel_layout_check(>out_ch_layout)) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
+av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l2 : "");
+return AVERROR(EINVAL);
+}
 #else
 s->out.ch_count  = s-> user_out_chlayout.nb_channels;
 s-> in.ch_count  = s->  user_in_chlayout.nb_channels;
 
+if (!(ret = av_channel_layout_check(>user_in_chlayout)) || 
s->user_in_chlayout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>user_in_chlayout, l1, sizeof(l1));
+av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l1 : "");
+return AVERROR(EINVAL);
+}
+
+if (!(ret = av_channel_layout_check(>user_out_chlayout)) || 
s->user_out_chlayout.nb_channels > SWR_CH_MAX) {
+if (ret)
+av_channel_layout_describe(>user_out_chlayout, l2, sizeof(l2));
+av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", ret ? l2 : "");
+return AVERROR(EINVAL);
+}
+
 ret  = av_channel_layout_copy(>in_ch_layout, >user_in_chlayout);
 ret |= av_channel_layout_copy(>out_ch_layout, >user_out_chlayout);
 if (ret < 0)
@@ -261,18 +289,6 @@ av_cold int swr_init(struct SwrContext *s){
 
 s->dither.method = s->user_dither_method;
 
-if (!av_channel_layout_check(>in_ch_layout) || 
s->in_ch_layout.nb_channels > SWR_CH_MAX) {
-av_channel_layout_describe(>in_ch_layout, l1, sizeof(l1));
-av_log(s, AV_LOG_WARNING, "Input channel layout \"%s\" is invalid or 
unsupported.\n", l1);
-av_channel_layout_uninit(>in_ch_layout);
-}
-
-if (!av_channel_layout_check(>out_ch_layout) || 
s->out_ch_layout.nb_channels > SWR_CH_MAX) {
-av_channel_layout_describe(>out_ch_layout, l2, sizeof(l2));
-av_log(s, AV_LOG_WARNING, "Output channel layout \"%s\" is invalid or 
unsupported.\n", l2);
-av_channel_layout_uninit(>out_ch_layout);
-}
-
 switch(s->engine){
 #if CONFIG_LIBSOXR
 case SWR_ENGINE_SOXR: s->resampler = _soxr_resampler; break;
@@ -291,9 +307,9 @@ av_cold int swr_init(struct SwrContext *s){
 av_channel_layout_uninit(>in_ch_layout);
 }
 
-if (!s->in_ch_layout.nb_channels || s->in_ch_layout.order == 
AV_CHANNEL_ORDER_UNSPEC)
+if (s->in_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
 av_channel_layout_default(>in_ch_layout, s->used_ch_count);
-if (!s->out_ch_layout.nb_channels || s->out_ch_layout.order == 
AV_CHANNEL_ORDER_UNSPEC)
+if (s->out_ch_layout.order == AV_CHANNEL_ORDER_UNSPEC)
 av_channel_layout_default(>out_ch_layout, s->out.ch_count);
 
 s->rematrix = av_channel_layout_compare(>out_ch_layout, 
>in_ch_layout) ||
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org

Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()

2022-09-08 Thread Andreas Rheinhardt
Michael Niedermayer:
> On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
>> Michael Niedermayer:
>>> Hi
>>>
>>> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
 Up until now, libswscale/input.c used a macro to read
 an input pixel which involved a call to av_pix_fmt_desc_get()
 to find out whether the input pixel format is BE or LE
 despite this being known at compile-time (there are templates
 per pixfmt). Even worse, these calls are made in a loop,
 so that e.g. there are six calls to av_pix_fmt_desc_get()
 for every pair of UV pixel processed in
 rgb64ToUV_half_c_template().

 This commit modifies these macros to ensure that isBE()
 is evaluated at compile-time. This saved 9743B of .text
 for me (GCC 11.2, -O3).
>>>
>>> hmm, all these functions where supposed to be optimized out
>>> why where they not ?
>>>
>>> iam asking as the code is simpler before your patch if that
>>> "optimization out" thing would work
>>>
>>
>> Why should these functions be optimized out? What would enable the
>> compiler to optimize them out?
> 
> Going back into the past, there was
> 6b0768e2021b90215a2ab55ed427bce91d148148
> 
> before this the code certainly did get optimized out, it was just
> #define isBE(x) ((x)&1)
> 
> thats simple and clean code btw

I don't really consider such magic numbers to be clean.

> after this it became
> 
> #define isBE(x) \
> +(av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)
> 
> thats still really good, and very readable, its a const array so
> one would assume that a compiler can figure that out at compile time
> well, i try not to think of linking and seperate objects here ;)
> 
> next it got then replaced by a function and a call that i suspect
> people thought would be inlined
> 
> 
>> (And I really don't see why this patch would make the code more
>> complicated.)
> 
> the code historically was capable to lookup any flag and detail
> of a pixel format at compile time
> now your code works around that not working. Introducing a 2nd
> system to do this in parallel. 

I am not introducing a second system, I am reusing the existing system,
namely our existing naming system (the fact that we use BE/LE in the
name of BE/LE pixel formats).

> To me if i look at the evolution
> of isBE() / code checking BE-ness it become more messy over time
> 
> I think it would be interresting to think about if we can make
> av_pix_fmt_desc_get(compile time constant) work at compile time.
> or if we maybe can return to a simpler implementation
> 

We could put the av_pix_fmt_descriptors array into an internal header
and use something like

static av_always_inline const AVPixFmtDescriptor
*ff_pix_fmt_descriptor_get(enum AVPixelFormat fmt)
{
if (av_builtin_constant_p(fmt))
return _pix_fmt_descriptors[fmt];
return av_pix_fmt_desc_get(fmt);
}

- Andreas
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/3] avcodec/fmvc: buffer size is stride based not 4*width

2022-09-08 Thread Michael Niedermayer
On Sun, Sep 04, 2022 at 11:42:49PM +0200, Michael Niedermayer wrote:
> On Fri, Sep 02, 2022 at 06:48:57PM +0200, Paul B Mahol wrote:
> > On Fri, Sep 2, 2022 at 6:32 PM Michael Niedermayer 
> > wrote:
> > 
> > > On Mon, Jun 13, 2022 at 09:13:19PM +0200, Michael Niedermayer wrote:
> > > > On Mon, Jun 13, 2022 at 12:10:44PM +0200, Paul B Mahol wrote:
> > > > > On Mon, Jun 13, 2022 at 11:48 AM Anton Khirnov 
> > > wrote:
> > > > >
> > > > > > Quoting Paul B Mahol (2022-06-13 11:34:44)
> > > > > > > On Mon, Jun 13, 2022 at 11:10 AM Anton Khirnov 
> > > > > > wrote:
> > > > > > >
> > > > > > > > Quoting Paul B Mahol (2022-06-13 10:04:04)
> > > > > > > > > On Sat, Jun 11, 2022 at 4:55 PM Michael Niedermayer <
> > > > > > > > mich...@niedermayer.cc>
> > > > > > > > > wrote:
> > > > > > > > >
> > > > > > > > > > On Sat, Jun 11, 2022 at 10:47:57AM +0200, Paul B Mahol 
> > > > > > > > > > wrote:
> > > > > > > > > > > Have you actually tested this "change" ?
> > > > > > > > > >
> > > > > > > > > > On every file i found
> > > > > > > > > > 6-methyl-5-hepten-2-one-CC-db_small.avi
> > > > > > > > > > fmvcVirtualDub_small.avi
> > > > > > > > > > skrzyzowanie4.avi
> > > > > > > > > > fmvc-poc.avi
> > > > > > > > > >
> > > > > > > > > > are there any other files i should test it on ?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > Yes, the ones where stride != width.
> > > > > > > >
> > > > > > > > Give examples of such files then. And add more tests.
> > > > > > > >
> > > > > > > > You really should try to be more helpful if you care about this
> > > code
> > > > > > > > working.
> > > > > > >
> > > > > > >
> > > > > > > Code works perfectly from start. There are always attempts to
> > > break it.
> > > > > > > Your attempts to belittle my work are futile.
> > > > > >
> > > > > > Perfect code should live in an external repository that is locked
> > > > > > against modification.
> > > > > >
> > > > > > The ffmpeg repository is only for imperfect code that evolves with
> > > time,
> > > > > > and so requires changes.
> > > > > >
> > > > > >
> > > > > I dunno what Michael attempts to fix. Decoder works fine with valid
> > > files.
> > > > > I doubt that encoder would encode random bytes or padding into valid
> > > file
> > > > > bitstream.
> > > >
> > > > the stride*4 / width*4 change was because of 2 things.
> > > > first with AV_PIX_FMT_BGR24 the data stored is not width*4
> > > >
> > > > stride is in units of 4 bytes for some reason, so stride*4
> > > > fixes this
> > > > The 2nd issue is that the code addresses it by "s->stride * 4"
> > > > so the buffer allocation should be stride*4 if we belive the
> > > > other code is correct
> > > >
> > > > src = s->buffer;
> > > > ...
> > > > for (y = 0; y < avctx->height; y++) {
> > > > ...
> > > > src += s->stride * 4;
> > > >
> > > > width*4 works because its bigger than stride*4 for BGR24 which is what
> > > all
> > > > samples i have use.
> > > >
> > > > also
> > > > ssrc = s->buffer;
> > > > ...
> > > > for (y = 0; y < avctx->height; y++) {
> > > > ...
> > > > ssrc += s->stride * 4;
> > > > and
> > > > dst = (uint32_t *)s->buffer;
> > > >
> > > > for (block = 0, y = 0; y < s->yb; y++) {
> > > > int block_h = s->blocks[block].h;
> > > > uint32_t *rect = dst;
> > > >
> > > > for (x = 0; x < s->xb; x++) {
> > > > int block_w = s->blocks[block].w;
> > > > uint32_t *row = dst;
> > > >
> > > > block_h = s->blocks[block].h;
> > > > if (s->blocks[block].xor) {
> > > > for (k = 0; k < block_h; k++) {
> > > > uint32_t *column = dst;
> > > > for (l = 0; l < block_w; l++)
> > > > *dst++ ^= *src++;
> > > > dst = [s->stride];
> > > > }
> > > > }
> > > > dst = [block_w];
> > > > ++block;
> > > > }
> > > > dst = [block_h * s->stride];
> > > > }
> > > >
> > > > Again, if you have fmvc files with more odd widths or other pixel 
> > > > formats
> > > > these would be very welcome. I can just say the code as is in git is
> > > wrong
> > > > and the buffer size as is in git is wrong. I noticed this when i added
> > > > a check to see if the buffer is only partly filled and realized its
> > > > always partly filled even when the whole image is actually touched
> > >
> > > If there are no objections aka noone sees a bug in this then id like
> > > to apply this
> > >
> > 
> > Since when are partially filled buffers are bad thing?
> 
> - waste of memory
> - breaks subsequent patch
> - width and stride relate this way: 
>   s->stride = (avctx->width * avctx->bits_per_coded_sample + 31) / 32;
>   is width always bigger or equal ?
>   If not we might be accessing outside the array because 

Re: [FFmpeg-devel] [PATCH 1/3] avcodec/fmvc: Move frame allocation to a later stage

2022-09-08 Thread Michael Niedermayer
On Sat, Jun 11, 2022 at 01:10:43AM +0200, Michael Niedermayer wrote:
> This way more things are checked before allocation
> 
> Signed-off-by: Michael Niedermayer 
> ---
>  libavcodec/fmvc.c | 21 +++--
>  1 file changed, 15 insertions(+), 6 deletions(-)

will apply

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2] avcodec/jpeg2000: Add support for High-Throughput JPEG 2000 (HTJ2K) decoding.

2022-09-08 Thread etemesicaleb
From: caleb 

Rebased this patch on master branch
---
 libavcodec/Makefile|2 +-
 libavcodec/j2kenc.c|   26 +-
 libavcodec/jpeg2000.h  |  103 ++-
 libavcodec/jpeg2000dec.c   |  193 ++
 libavcodec/jpeg2000htdec.c | 1212 
 libavcodec/jpeg2000htdec.h |  210 +++
 6 files changed, 1599 insertions(+), 147 deletions(-)
 create mode 100644 libavcodec/jpeg2000htdec.c
 create mode 100644 libavcodec/jpeg2000htdec.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 945908e3b8..ecf5c47cad 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -450,7 +450,7 @@ OBJS-$(CONFIG_JACOSUB_DECODER) += jacosubdec.o ass.o
 OBJS-$(CONFIG_JPEG2000_ENCODER)+= j2kenc.o mqcenc.o mqc.o jpeg2000.o \
   jpeg2000dwt.o
 OBJS-$(CONFIG_JPEG2000_DECODER)+= jpeg2000dec.o jpeg2000.o 
jpeg2000dsp.o \
-  jpeg2000dwt.o mqcdec.o mqc.o
+  jpeg2000dwt.o mqcdec.o mqc.o 
jpeg2000htdec.o
 OBJS-$(CONFIG_JPEGLS_DECODER)  += jpeglsdec.o jpegls.o
 OBJS-$(CONFIG_JPEGLS_ENCODER)  += jpeglsenc.o jpegls.o
 OBJS-$(CONFIG_JV_DECODER)  += jvdec.o
diff --git a/libavcodec/j2kenc.c b/libavcodec/j2kenc.c
index e883d5deb7..233d75e96d 100644
--- a/libavcodec/j2kenc.c
+++ b/libavcodec/j2kenc.c
@@ -106,7 +106,7 @@ static const int dwt_norms[2][4][10] = { // 
[dwt_type][band][rlevel] (multiplied
 typedef struct {
Jpeg2000Component *comp;
double *layer_rates;
-} Jpeg2000Tile;
+} Jpeg2000EncTile;
 
 typedef struct {
 AVClass *class;
@@ -131,7 +131,7 @@ typedef struct {
 Jpeg2000CodingStyle codsty;
 Jpeg2000QuantStyle  qntsty;
 
-Jpeg2000Tile *tile;
+Jpeg2000EncTile *tile;
 int layer_rates[100];
 uint8_t compression_rate_enc; ///< Is compression done using compression 
ratio?
 
@@ -427,7 +427,7 @@ static void compute_rates(Jpeg2000EncoderContext* s)
 int layno, compno;
 for (i = 0; i < s->numYtiles; i++) {
 for (j = 0; j < s->numXtiles; j++) {
-Jpeg2000Tile *tile = >tile[s->numXtiles * i + j];
+Jpeg2000EncTile *tile = >tile[s->numXtiles * i + j];
 for (compno = 0; compno < s->ncomponents; compno++) {
 int tilew = tile->comp[compno].coord[0][1] - 
tile->comp[compno].coord[0][0];
 int tileh = tile->comp[compno].coord[1][1] - 
tile->comp[compno].coord[1][0];
@@ -460,12 +460,12 @@ static int init_tiles(Jpeg2000EncoderContext *s)
 s->numXtiles = ff_jpeg2000_ceildiv(s->width, s->tile_width);
 s->numYtiles = ff_jpeg2000_ceildiv(s->height, s->tile_height);
 
-s->tile = av_calloc(s->numXtiles, s->numYtiles * sizeof(Jpeg2000Tile));
+s->tile = av_calloc(s->numXtiles, s->numYtiles * sizeof(Jpeg2000EncTile));
 if (!s->tile)
 return AVERROR(ENOMEM);
 for (tileno = 0, tiley = 0; tiley < s->numYtiles; tiley++)
 for (tilex = 0; tilex < s->numXtiles; tilex++, tileno++){
-Jpeg2000Tile *tile = s->tile + tileno;
+Jpeg2000EncTile *tile = s->tile + tileno;
 
 tile->comp = av_calloc(s->ncomponents, sizeof(*tile->comp));
 if (!tile->comp)
@@ -509,7 +509,7 @@ static int init_tiles(Jpeg2000EncoderContext *s)
 int tileno, compno, i, y, x;   
 \
 const PIXEL *line; 
 \
 for (tileno = 0; tileno < s->numXtiles * s->numYtiles; tileno++){  
 \
-Jpeg2000Tile *tile = s->tile + tileno; 
 \
+Jpeg2000EncTile *tile = s->tile + tileno;  
\
 if (s->planar){
 \
 for (compno = 0; compno < s->ncomponents; compno++){   
 \
 Jpeg2000Component *comp = tile->comp + compno; 
 \
@@ -701,7 +701,7 @@ static void encode_clnpass(Jpeg2000T1Context *t1, int 
width, int height, int ban
 }
 }
 
-static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, 
Jpeg2000Cblk *cblk, Jpeg2000Tile *tile,
+static void encode_cblk(Jpeg2000EncoderContext *s, Jpeg2000T1Context *t1, 
Jpeg2000Cblk *cblk, Jpeg2000EncTile *tile,
 int width, int height, int bandpos, int lev)
 {
 int pass_t = 2, passno, x, y, max=0, nmsedec, bpno;
@@ -935,7 +935,7 @@ static int encode_packet(Jpeg2000EncoderContext *s, 
Jpeg2000ResLevel *rlevel, in
 return 0;
 }
 
-static int 

Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()

2022-09-08 Thread Michael Niedermayer
On Thu, Sep 08, 2022 at 09:38:51PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > Hi
> > 
> > On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> >> Up until now, libswscale/input.c used a macro to read
> >> an input pixel which involved a call to av_pix_fmt_desc_get()
> >> to find out whether the input pixel format is BE or LE
> >> despite this being known at compile-time (there are templates
> >> per pixfmt). Even worse, these calls are made in a loop,
> >> so that e.g. there are six calls to av_pix_fmt_desc_get()
> >> for every pair of UV pixel processed in
> >> rgb64ToUV_half_c_template().
> >>
> >> This commit modifies these macros to ensure that isBE()
> >> is evaluated at compile-time. This saved 9743B of .text
> >> for me (GCC 11.2, -O3).
> > 
> > hmm, all these functions where supposed to be optimized out
> > why where they not ?
> > 
> > iam asking as the code is simpler before your patch if that
> > "optimization out" thing would work
> > 
> 
> Why should these functions be optimized out? What would enable the
> compiler to optimize them out?

Going back into the past, there was
6b0768e2021b90215a2ab55ed427bce91d148148

before this the code certainly did get optimized out, it was just
#define isBE(x) ((x)&1)

thats simple and clean code btw
after this it became

#define isBE(x) \
+(av_pix_fmt_descriptors[x].flags & PIX_FMT_BE)

thats still really good, and very readable, its a const array so
one would assume that a compiler can figure that out at compile time
well, i try not to think of linking and seperate objects here ;)

next it got then replaced by a function and a call that i suspect
people thought would be inlined


> (And I really don't see why this patch would make the code more
> complicated.)

the code historically was capable to lookup any flag and detail
of a pixel format at compile time
now your code works around that not working. Introducing a 2nd
system to do this in parallel. To me if i look at the evolution
of isBE() / code checking BE-ness it become more messy over time

I think it would be interresting to think about if we can make
av_pix_fmt_desc_get(compile time constant) work at compile time.
or if we maybe can return to a simpler implementation


thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Awnsering whenever a program halts or runs forever is
On a turing machine, in general impossible (turings halting problem).
On any real computer, always possible as a real computer has a finite number
of states N, and will either halt in less than N cycles or never halt.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/1] fate/opus: add silk LBRR test (refs #9890)

2022-09-08 Thread Tristan Matthews
On Thu, Sep 8, 2022 at 3:58 PM Tristan Matthews  wrote:

> This adds a fate test for a sample with LBRR packets.
>
> It requires that these files be uploaded:
> https://people.videolan.org/~tmatth/9890-fate/silk-lbrr.mka
> https://people.videolan.org/~tmatth/9890-fate/silk-lbrr.dec
>
> ---
>  tests/fate/opus.mak | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/tests/fate/opus.mak b/tests/fate/opus.mak
> index 573044ed15..7d359f414a 100644
> --- a/tests/fate/opus.mak
> +++ b/tests/fate/opus.mak
> @@ -4,7 +4,7 @@
>
>  OPUS_CELT_SAMPLES   = $(addprefix testvector, 01 11) tron.6ch.tinypkts
>  OPUS_HYBRID_SAMPLES = $(addprefix testvector, 05 06)
> -OPUS_SILK_SAMPLES   = $(addprefix testvector, 02 03 04)
> +OPUS_SILK_SAMPLES   = $(addprefix testvector, 02 03 04) silk-lbrr
>  OPUS_OTHER_SAMPLES  = $(addprefix testvector, 07 08 09 10 12)
>
>  define FATE_OPUS_TEST
> @@ -33,6 +33,7 @@ fate-opus-testvector09:  CMP_TARGET = 0
>  fate-opus-testvector10:  CMP_TARGET = 38
>  fate-opus-testvector11:  CMP_TARGET = 0
>  fate-opus-testvector12:  CMP_TARGET = 160
> +fate-opus-silk-lbrr: CMP_TARGET = 0
>  fate-opus-tron.6ch.tinypkts: CMP_SHIFT = 1440
>  fate-opus-tron.6ch.tinypkts: CMP_TARGET = 0
>
> --
> 2.34.1
>


Sorry this was supposed to be in response to
http://ffmpeg.org/pipermail/ffmpeg-devel/2022-August/300758.html (as it
depends on it) but I screwed up the git-send-email.

Best,
-t
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/1] fate/opus: add silk LBRR test (refs #9890)

2022-09-08 Thread Tristan Matthews
This adds a fate test for a sample with LBRR packets.

It requires that these files be uploaded:
https://people.videolan.org/~tmatth/9890-fate/silk-lbrr.mka
https://people.videolan.org/~tmatth/9890-fate/silk-lbrr.dec

---
 tests/fate/opus.mak | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/fate/opus.mak b/tests/fate/opus.mak
index 573044ed15..7d359f414a 100644
--- a/tests/fate/opus.mak
+++ b/tests/fate/opus.mak
@@ -4,7 +4,7 @@
 
 OPUS_CELT_SAMPLES   = $(addprefix testvector, 01 11) tron.6ch.tinypkts
 OPUS_HYBRID_SAMPLES = $(addprefix testvector, 05 06)
-OPUS_SILK_SAMPLES   = $(addprefix testvector, 02 03 04)
+OPUS_SILK_SAMPLES   = $(addprefix testvector, 02 03 04) silk-lbrr
 OPUS_OTHER_SAMPLES  = $(addprefix testvector, 07 08 09 10 12)
 
 define FATE_OPUS_TEST
@@ -33,6 +33,7 @@ fate-opus-testvector09:  CMP_TARGET = 0
 fate-opus-testvector10:  CMP_TARGET = 38
 fate-opus-testvector11:  CMP_TARGET = 0
 fate-opus-testvector12:  CMP_TARGET = 160
+fate-opus-silk-lbrr: CMP_TARGET = 0
 fate-opus-tron.6ch.tinypkts: CMP_SHIFT = 1440
 fate-opus-tron.6ch.tinypkts: CMP_TARGET = 0
 
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()

2022-09-08 Thread Andreas Rheinhardt
Michael Niedermayer:
> Hi
> 
> On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
>> Up until now, libswscale/input.c used a macro to read
>> an input pixel which involved a call to av_pix_fmt_desc_get()
>> to find out whether the input pixel format is BE or LE
>> despite this being known at compile-time (there are templates
>> per pixfmt). Even worse, these calls are made in a loop,
>> so that e.g. there are six calls to av_pix_fmt_desc_get()
>> for every pair of UV pixel processed in
>> rgb64ToUV_half_c_template().
>>
>> This commit modifies these macros to ensure that isBE()
>> is evaluated at compile-time. This saved 9743B of .text
>> for me (GCC 11.2, -O3).
> 
> hmm, all these functions where supposed to be optimized out
> why where they not ?
> 
> iam asking as the code is simpler before your patch if that
> "optimization out" thing would work
> 

Why should these functions be optimized out? What would enable the
compiler to optimize them out?
(And I really don't see why this patch would make the code more
complicated.)

- Andreas
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/2] swscale/input: Avoid calls to av_pix_fmt_desc_get()

2022-09-08 Thread Michael Niedermayer
Hi

On Thu, Sep 08, 2022 at 04:38:11AM +0200, Andreas Rheinhardt wrote:
> Up until now, libswscale/input.c used a macro to read
> an input pixel which involved a call to av_pix_fmt_desc_get()
> to find out whether the input pixel format is BE or LE
> despite this being known at compile-time (there are templates
> per pixfmt). Even worse, these calls are made in a loop,
> so that e.g. there are six calls to av_pix_fmt_desc_get()
> for every pair of UV pixel processed in
> rgb64ToUV_half_c_template().
> 
> This commit modifies these macros to ensure that isBE()
> is evaluated at compile-time. This saved 9743B of .text
> for me (GCC 11.2, -O3).

hmm, all these functions where supposed to be optimized out
why where they not ?

iam asking as the code is simpler before your patch if that
"optimization out" thing would work

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Everything should be made as simple as possible, but not simpler.
-- Albert Einstein


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avfilter/vf_showinfo: remove backspaces

2022-09-08 Thread Michael Niedermayer
On Thu, Jul 21, 2022 at 08:28:04PM +0200, Michael Niedermayer wrote:
> They mess with storing editing and comparing the results
> 
> Signed-off-by: Michael Niedermayer 
> ---
>  libavfilter/vf_showinfo.c | 11 +++
>  1 file changed, 7 insertions(+), 4 deletions(-)

will apply

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat/matroska: Support HDR10+ metadata in Matroska.

2022-09-08 Thread Mohammad Izadi
Michael, I appreciate it if you can take a look and give me your feedback.


On Thu, Sep 8, 2022 at 9:31 AM Michael Niedermayer 
wrote:

> On Wed, Sep 07, 2022 at 02:12:46PM +0100, Derek Buitenhuis wrote:
> > On 9/6/2022 10:47 PM, Mohammad Izadi wrote:
> > > +if (side_data && side_data_size > 0)
> > > +
> ff_write_dynamic_hdr10_plus_to_full_itu_t_t35((AVDynamicHDRPlus*)side_data,
> _plus_itu_t_t35, _plus_itu_t_t35_size);
> >
> > You can't use ff_-prefixed functions across library boundaries.
> >
> > It nees to be either public (av*) or avpriv. I suspect people won't want
> it to
> > be avpriv.
> >
> > Personally, I think having serialization as a public API is useful, but
> YMMV. Mostly
> > because I was just writing my own serialization to make use of the
> exported side data :P.
>
> I agree
>
> on a related subject, side data serialization should be moved to a common
> API
> We have common APIs for parsers, decoder, bitstream filters but for
> parsing/decoding side data this is heading toward something less structured
>
> Above is not a comment on this patch, the patch is fine. I just want to
> point
> to this before we have several dozen such functions which need to be
> deprecated and supported when a more structured system is introduced
>
> thx
>
>
> [...]
> --
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Into a blind darkness they enter who follow after the Ignorance,
> they as if into a greater darkness enter who devote themselves
> to the Knowledge alone. -- Isha Upanishad
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avformat/matroska: Support HDR10+ metadata in Matroska.

2022-09-08 Thread Michael Niedermayer
On Wed, Sep 07, 2022 at 02:12:46PM +0100, Derek Buitenhuis wrote:
> On 9/6/2022 10:47 PM, Mohammad Izadi wrote:
> > +if (side_data && side_data_size > 0)
> > +
> > ff_write_dynamic_hdr10_plus_to_full_itu_t_t35((AVDynamicHDRPlus*)side_data, 
> > _plus_itu_t_t35, _plus_itu_t_t35_size);
> 
> You can't use ff_-prefixed functions across library boundaries.
> 
> It nees to be either public (av*) or avpriv. I suspect people won't want it to
> be avpriv.
> 
> Personally, I think having serialization as a public API is useful, but YMMV. 
> Mostly
> because I was just writing my own serialization to make use of the exported 
> side data :P.

I agree

on a related subject, side data serialization should be moved to a common API
We have common APIs for parsers, decoder, bitstream filters but for 
parsing/decoding side data this is heading toward something less structured

Above is not a comment on this patch, the patch is fine. I just want to point
to this before we have several dozen such functions which need to be
deprecated and supported when a more structured system is introduced

thx


[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Into a blind darkness they enter who follow after the Ignorance,
they as if into a greater darkness enter who devote themselves
to the Knowledge alone. -- Isha Upanishad


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] lavu: header and documentation for AVWriter

2022-09-08 Thread Anton Khirnov
Quoting Nicolas George (2022-09-07 15:30:09)
> Anton Khirnov (12022-09-02):
> > As I already said to you in private, I do not think the motivation and
> > use cases for this have been sufficiently established.
> > 
> > You claim this will bring massive advantages all over the place. You
> > should support these claims with some actual patches that demonstrate
> > these advantages on some real code.
> 
> I have not claimed “massive advantages all over the place”, I have
> claimed minor advantages all over the places and massive advantages for
> future API.
> 
> With that correction stated, I have already answered you that your
> inability to see the benefits is only proof that your work on FFmpeg is
> on areas that rarely need strings. In fact, if I remember correctly, the
> last time it happened you used the worst possible implementation and we
> had to be multiple persons insisting to get it fixed.
> 
> Therefore, I consider your objection to be orders of magnitude less
> important than the positive feedback I finally had got from people who
> do use strings routinely, who in particular have used BPrint, are
> familiar with the benefits it brought but also the limitations I want to
> fix.

If ad hominem is the best argument you have then this clearly needs a
lot more work.

-- 
Anton Khirnov
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec/flac_parser: fix triggered assert

2022-09-08 Thread Paul B Mahol
Patch attached.
From a726d0a26c9f60d65167a83789f9c222cfda5728 Mon Sep 17 00:00:00 2001
From: Paul B Mahol 
Date: Thu, 8 Sep 2022 09:59:09 +0200
Subject: [PATCH] avcodec/flac_parser: avoid returning too negative number

If return value is very small parser code will assert.

Signed-off-by: Paul B Mahol 
---
 libavcodec/flac_parser.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/flac_parser.c b/libavcodec/flac_parser.c
index 5b3a4e6e67..bd91cc1a05 100644
--- a/libavcodec/flac_parser.c
+++ b/libavcodec/flac_parser.c
@@ -663,8 +663,11 @@ static int get_best_header(FLACParseContext *fpc, const uint8_t **poutbuf,
 
 /* Return the negative overread index so the client can compute pos.
This should be the amount overread to the beginning of the child */
-if (child)
-return child->offset - flac_fifo_size(>fifo_buf);
+if (child) {
+int64_t offset = child->offset - flac_fifo_size(>fifo_buf);
+if (offset > -(1 << 28))
+return offset;
+}
 return 0;
 }
 
-- 
2.37.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] avformat/imfdec: check track valid before use it

2022-09-08 Thread Pierre-Anthony Lemieux
See [1] for a patchset that is intended to address the coverity issue
and simplify error handling.

[1] 
https://patchwork.ffmpeg.org/project/ffmpeg/patch/20220907200233.21255-1-...@sandflow.com/

On Sat, Aug 27, 2022 at 10:25 AM Pierre-Anthony Lemieux
 wrote:
>
> On Sat, Aug 27, 2022 at 5:25 AM Steven Liu  wrote:
> >
> > Pierre-Anthony Lemieux  于2022年8月27日周六 00:06写道:
> > >
> > > On Fri, Aug 26, 2022 at 9:01 AM Andreas Rheinhardt
> > >  wrote:
> > > >
> > > > Pierre-Anthony Lemieux:
> > > > > On Fri, Aug 26, 2022 at 1:37 AM Andreas Rheinhardt
> > > > >  wrote:
> > > > >>
> > > > >> Steven Liu:
> > > > >>> fix CID: 1512414
> > > > >>> And return AVERROR_INVALIDDATA when 
> > > > >>> get_next_track_with_minimum_timestamp
> > > > >>> incorrect in imf_read_packet;
> > > > >>>
> > > > >>> Signed-off-by: Steven Liu 
> > > > >>> ---
> > > > >>>  libavformat/imfdec.c | 7 +--
> > > > >>>  1 file changed, 5 insertions(+), 2 deletions(-)
> > > > >>>
> > > > >>> diff --git a/libavformat/imfdec.c b/libavformat/imfdec.c
> > > > >>> index 5bbe7a53f8..08f342bc1a 100644
> > > > >>> --- a/libavformat/imfdec.c
> > > > >>> +++ b/libavformat/imfdec.c
> > > > >>> @@ -697,8 +697,9 @@ static IMFVirtualTrackPlaybackCtx 
> > > > >>> *get_next_track_with_minimum_timestamp(AVForma
> > > > >>>  }
> > > > >>>  }
> > > > >>>
> > > > >>> -av_log(s, AV_LOG_DEBUG, "Found next track to read: %d 
> > > > >>> (timestamp: %lf / %lf)\n",
> > > > >>> -   track->index, av_q2d(track->current_timestamp), 
> > > > >>> av_q2d(minimum_timestamp));
> > > > >>> +if (track)
> > > > >>> +av_log(s, AV_LOG_DEBUG, "Found next track to read: %d 
> > > > >>> (timestamp: %lf / %lf)\n",
> > > > >>> +   track->index, av_q2d(track->current_timestamp), 
> > > > >>> av_q2d(minimum_timestamp));
> > > > >>
> > > > >> Coverity actually complained about track being uninitialized, which 
> > > > >> this
> > > > >> patch does not address. And the reason it does this is that it 
> > > > >> doesn't
> > > > >> understand the algorithm: track will always be initialized in the 
> > > > >> first
> > > > >> iteration of the loop.
> > > > >
> > > > > Is it possible to tell coverity that  c->track_count > 0 is a
> > > > > pre-condition, or should we modify the loop/algorithm?
> > > > >
> > > >
> > > > The typical way to do this is to add an av_assert1 or av_assert2;
> > > > but this must only be done if it is indeed ensured that the assert will
> > > > not be triggered.
> > > >
> > > > >> (If there is a first iteration of the loop -- is
> > > > >> this actually guaranteed? A file without tracks seems to be pretty 
> > > > >> useless.)
> > > > >
> > > > > imfdec currently assumes that (a) imf_read_packet() is not called if
> > > > > there are no streams/tracks and (b) a track will always be found.
> > > > >
> > > > > (b) will be true for a conformant IMF Composition, but I am not sure
> > > > > it can always be true for a malformed one.
> > > > >
> > > >
> > > > Can't we make it true by adding the relevant checks to read_header?
> > >
> > > Yes.
> > Can imf add or remove track when processing? Looks like the live
> > streaming change resolution or bitrate when playing.
>
> The number of tracks is fixed and determined when the Composition
> Playlist (CPL) is parsed.
>
> > >
> > > >
> > > > > I think imf_read_packet() can probably be hardened. Perhaps do this as
> > > > > a patch separately from addressing the coverity issue?
> > > > >
> > > > >> FYI: In Coverity's analysis there are loop iterations, but it just
> > > > >> assumed that track is not initialized in the loop (which boils down 
> > > > >> to
> > > > >> saying that it presumed the tracks' current_timestamp to be invalid
> > > > >> (denominator 0). I hope this can't happen.
> > > > >> (There is btw another issue: The initialization of minimum_timestamp
> > > > >> presumes that int are 32bit which need not be true.)
> > > > >
> > > > > INT32_MAX -> INT_MAX should fix this right?
> > > > >
> > > >
> > > > Yes.
> > > >
> > > > >>
> > > > >>>  return track;
> > > > >>>  }
> > > > >>>
> > > > >>> @@ -760,6 +761,8 @@ static int imf_read_packet(AVFormatContext *s, 
> > > > >>> AVPacket *pkt)
> > > > >>>  AVRational next_timestamp;
> > > > >>>
> > > > >>>  track = get_next_track_with_minimum_timestamp(s);
> > > > >>> +if (!track)
> > > > >>> +return AVERROR_INVALIDDATA;
> > > > >>>
> > > > >>>  ret = get_resource_context_for_timestamp(s, track, );
> > > > >>>  if (ret)
> > > > >>
> > > > >> ___
> > > > >> ffmpeg-devel mailing list
> > > > >> ffmpeg-devel@ffmpeg.org
> > > > >> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> > > > >>
> > > > >> To unsubscribe, visit link above, or email
> > > > >> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
> > > > > ___
> > > > > ffmpeg-devel mailing list
> > > > > ffmpeg-devel@ffmpeg.org
> > > > > 

[FFmpeg-devel] [PATCH v1 1/1] avfilter/vf_tonemap: add herimte tone mapping

2022-09-08 Thread mirsfang
From: mirs 

Signed-off-by: mirs 

add tonemapping hermite,this looks close real hdr display

format patch email

---
 libavfilter/vf_tonemap.c | 65 
 1 file changed, 65 insertions(+)

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index d1087e6bd9..3fb2bdfa2c 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -48,6 +48,7 @@ enum TonemapAlgorithm {
 TONEMAP_REINHARD,
 TONEMAP_HABLE,
 TONEMAP_MOBIUS,
+TONEMAP_HERMITE,
 TONEMAP_MAX,
 };
 
@@ -106,6 +107,66 @@ static float mobius(float in, float j, double peak)
 return (b * b + 2.0f * b * j + j * j) / (b - a) * (in + a) / (in + b);
 }
 
+static float hermite(float in) {
+/*
+ * in theory, max mastering lumi 、max content lumi frome packet side data,
+ * display max lumi from Display system value,current value is 
R-REP-BT.2390 max value 
+ */
+float max_mastering_lumi = 1000.0f;
+float max_content_lumi = 1000.0f;
+float display_max_lumi = 400.0f; // Terminal domain 400NITS is the maximum 
lumen of a normal screen
+
+float max_in_lumi = FFMAX(max_mastering_lumi,max_content_lumi);
+float max_out_lumi = display_max_lumi;
+float nits = in * (display_max_lumi / max_mastering_lumi);
+
+// clamp
+if (nits < 0) {
+nits = 0.0;
+} else if (nits > max_in_lumi) {
+nits = max_in_lumi;
+}
+
+if (max_in_lumi <= max_out_lumi) {
+nits *= max_out_lumi / max_in_lumi;
+} else {
+// three control points
+const float x0 = 10.0f;
+const float y0 = 17.0;
+float x1 = max_out_lumi * 0.75;
+float y1 = x1;
+float x2 = x1 + (max_in_lumi - x1) / 2.0;
+float y2 = y1 + (max_out_lumi - y1) * 0.75;
+// horizontal distances between the last three control points
+float h12 = x2 - x1;
+float h23 = max_in_lumi - x2;
+// tangents at the last three control points
+float m1 = (y2 - y1) / h12;
+float m3 = (max_out_lumi - y2) / h23;
+float m2 = (m1 + m3) / 2.0;
+
+if (nits < x0) {
+// scale [0.0, x0] to [0.0, y0] linearly
+float slope = y0 / x0;
+nits *= slope;
+} else if (nits < x1) {
+// scale [x0, x1] to [y0, y1] linearly
+float slope = (y1 - y0) / (x1 - x0);
+nits = y0 + (nits - x0) * slope;
+} else if (nits < x2) {
+// scale [x1, x2] to [y1, y2] using Hermite interp
+float t = (nits - x1) / h12;
+nits = (y1 * (1.0 + 2.0 * t) + h12 * m1 * t) * (1.0 - t) * (1.0 - 
t) +(y2 * (3.0 - 2.0 * t) + h12 * m2 * (t - 1.0)) * t * t;
+} else { 
+// scale [x2, maxInLumi] to [y2, maxOutLumi] using Hermite interp
+float t = (nits - x2) / h23;
+nits = (y2 * (1.0 + 2.0 * t) + h23 * m2 * t) * (1.0 - t) * (1.0 - 
t) +(max_out_lumi * (3.0 - 2.0 * t) + h23 * m3 * (t - 1.0)) * t * t;
+}
+}
+
+return nits;
+}
+
 #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
 static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
 const AVPixFmtDescriptor *desc, int x, int y, double peak)
@@ -163,6 +224,9 @@ static void tonemap(TonemapContext *s, AVFrame *out, const 
AVFrame *in,
 case TONEMAP_MOBIUS:
 sig = mobius(sig, s->param, peak);
 break;
+case TONEMAP_HERMITE:
+sig = hermite(sig);
+break;
 }
 
 /* apply the computed scale factor to the color,
@@ -291,6 +355,7 @@ static const AVOption tonemap_options[] = {
 { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},  
0, 0, FLAGS, "tonemap" },
 { "hable",0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 
0, 0, FLAGS, "tonemap" },
 { "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},
0, 0, FLAGS, "tonemap" },
+{ "hermite",  0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HERMITE},   
   0, 0, FLAGS, "tonemap" },
 { "param","tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, 
{.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
 { "desat","desaturation strength", OFFSET(desat), 
AV_OPT_TYPE_DOUBLE, {.dbl = 2}, 0, DBL_MAX, FLAGS },
 { "peak", "signal peak override", OFFSET(peak), 
AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
-- 
2.32.1 (Apple Git-133)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 2/2] avcodec/dca_core: Only call emms_c() if needed

2022-09-08 Thread Andreas Rheinhardt
It is not needed on x64, because the AV_COPY* and AV_ZERO*
macros never use MMX on x64.

Signed-off-by: Andreas Rheinhardt 
---
 libavcodec/dca_core.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/libavcodec/dca_core.c b/libavcodec/dca_core.c
index 1655116eed..bbf36ea678 100644
--- a/libavcodec/dca_core.c
+++ b/libavcodec/dca_core.c
@@ -767,7 +767,9 @@ static void erase_adpcm_history(DCACoreDecoder *s)
 for (band = 0; band < DCA_SUBBANDS; band++)
 AV_ZERO128(s->subband_samples[ch][band] - DCA_ADPCM_COEFFS);
 
+#ifdef FF_COPY_SWAP_ZERO_USES_MMX
 emms_c();
+#endif
 }
 
 static int alloc_sample_buffer(DCACoreDecoder *s)
@@ -831,7 +833,9 @@ static int parse_frame_data(DCACoreDecoder *s, enum 
HeaderType header, int xch_b
 }
 }
 
+#ifdef FF_COPY_SWAP_ZERO_USES_MMX
 emms_c();
+#endif
 
 return 0;
 }
@@ -1276,7 +1280,9 @@ static void erase_x96_adpcm_history(DCACoreDecoder *s)
 for (band = 0; band < DCA_SUBBANDS_X96; band++)
 AV_ZERO128(s->x96_subband_samples[ch][band] - DCA_ADPCM_COEFFS);
 
+#ifdef FF_COPY_SWAP_ZERO_USES_MMX
 emms_c();
+#endif
 }
 
 static int alloc_x96_sample_buffer(DCACoreDecoder *s)
@@ -1506,7 +1512,9 @@ static int parse_x96_frame_data(DCACoreDecoder *s, int 
exss, int xch_base)
 }
 }
 
+#ifdef FF_COPY_SWAP_ZERO_USES_MMX
 emms_c();
+#endif
 
 return 0;
 }
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/2] avutil/x86/intreadwrite: Add ability to detect whether MMX code is used

2022-09-08 Thread Andreas Rheinhardt
It can be used to call emms_c() only when needed.

Signed-off-by: Andreas Rheinhardt 
---
 libavutil/x86/intreadwrite.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavutil/x86/intreadwrite.h b/libavutil/x86/intreadwrite.h
index 4061d19231..40f375b013 100644
--- a/libavutil/x86/intreadwrite.h
+++ b/libavutil/x86/intreadwrite.h
@@ -29,6 +29,8 @@
 
 #if !HAVE_FAST_64BIT && defined(__MMX__)
 
+#define FF_COPY_SWAP_ZERO_USES_MMX
+
 #define AV_COPY64 AV_COPY64
 static av_always_inline void AV_COPY64(void *d, const void *s)
 {
-- 
2.34.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] Fix bugs on Mips platform.

2022-09-08 Thread Hao Chen
[PATCH v1 1/2] Fix bugs in me_cmp_msa.c file.
[PATCH v1 2/2] Fix hevc decoding bugs on MIPS paltform.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v1 1/2] Fix bugs in me_cmp_msa.c file.

2022-09-08 Thread Hao Chen
From: Lu Wang 

This patch fixes a bug where the fate-checkasm-motion fails when
h is not a multiple of 8.
---
 libavcodec/mips/me_cmp_msa.c | 201 ++-
 1 file changed, 173 insertions(+), 28 deletions(-)

diff --git a/libavcodec/mips/me_cmp_msa.c b/libavcodec/mips/me_cmp_msa.c
index 00a3cfd53f..351494161f 100644
--- a/libavcodec/mips/me_cmp_msa.c
+++ b/libavcodec/mips/me_cmp_msa.c
@@ -25,11 +25,13 @@ static uint32_t sad_8width_msa(const uint8_t *src, int32_t 
src_stride,
const uint8_t *ref, int32_t ref_stride,
int32_t height)
 {
-int32_t ht_cnt;
+int32_t ht_cnt = height >> 2;
+int res = (height & 0x03);
 v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3;
+v8u16 zero = { 0 };
 v8u16 sad = { 0 };
 
-for (ht_cnt = (height >> 2); ht_cnt--;) {
+for (; ht_cnt--; ) {
 LD_UB4(src, src_stride, src0, src1, src2, src3);
 src += (4 * src_stride);
 LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
@@ -39,6 +41,16 @@ static uint32_t sad_8width_msa(const uint8_t *src, int32_t 
src_stride,
 src0, src1, ref0, ref1);
 sad += SAD_UB2_UH(src0, src1, ref0, ref1);
 }
+for (; res--; ) {
+v16u8 diff;
+src0 = LD_UB(src);
+ref0 = LD_UB(ref);
+src += src_stride;
+ref += ref_stride;
+diff = __msa_asub_u_b((v16u8) src0, (v16u8) ref0);
+diff = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)diff);
+sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
+}
 
 return (HADD_UH_U32(sad));
 }
@@ -47,11 +59,12 @@ static uint32_t sad_16width_msa(const uint8_t *src, int32_t 
src_stride,
 const uint8_t *ref, int32_t ref_stride,
 int32_t height)
 {
-int32_t ht_cnt;
+int32_t ht_cnt = height >> 2;
+int res = (height & 0x03);
 v16u8 src0, src1, ref0, ref1;
 v8u16 sad = { 0 };
 
-for (ht_cnt = (height >> 2); ht_cnt--;) {
+for (; ht_cnt--; ) {
 LD_UB2(src, src_stride, src0, src1);
 src += (2 * src_stride);
 LD_UB2(ref, ref_stride, ref0, ref1);
@@ -64,7 +77,15 @@ static uint32_t sad_16width_msa(const uint8_t *src, int32_t 
src_stride,
 ref += (2 * ref_stride);
 sad += SAD_UB2_UH(src0, src1, ref0, ref1);
 }
-
+for (; res > 0; res--) {
+v16u8 diff;
+src0 = LD_UB(src);
+ref0 = LD_UB(ref);
+src += src_stride;
+ref += ref_stride;
+diff = __msa_asub_u_b((v16u8) src0, (v16u8) ref0);
+sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
+}
 return (HADD_UH_U32(sad));
 }
 
@@ -74,12 +95,14 @@ static uint32_t sad_horiz_bilinear_filter_8width_msa(const 
uint8_t *src,
  int32_t ref_stride,
  int32_t height)
 {
-int32_t ht_cnt;
+int32_t ht_cnt = height >> 3;
+int32_t res = height & 0x07;
 v16u8 src0, src1, src2, src3, comp0, comp1;
 v16u8 ref0, ref1, ref2, ref3, ref4, ref5;
+v8u16 zero = { 0 };
 v8u16 sad = { 0 };
 
-for (ht_cnt = (height >> 3); ht_cnt--;) {
+for (; ht_cnt--; ) {
 LD_UB4(src, src_stride, src0, src1, src2, src3);
 src += (4 * src_stride);
 LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3);
@@ -107,6 +130,18 @@ static uint32_t sad_horiz_bilinear_filter_8width_msa(const 
uint8_t *src,
 sad += SAD_UB2_UH(src0, src1, comp0, comp1);
 }
 
+for (; res--; ) {
+v16u8 diff;
+src0 = LD_UB(src);
+ref0 = LD_UB(ref);
+ref1 = LD_UB(ref + 1);
+src += src_stride;
+ref += ref_stride;
+comp0 = (v16u8)__msa_aver_u_b((v16u8) ref0, (v16u8) ref1);
+diff = __msa_asub_u_b((v16u8) src0, (v16u8) comp0);
+diff = (v16u8)__msa_ilvr_d((v2i64) zero, (v2i64) diff);
+sad += __msa_hadd_u_h((v16u8) diff, (v16u8) diff);
+}
 return (HADD_UH_U32(sad));
 }
 
@@ -116,12 +151,13 @@ static uint32_t 
sad_horiz_bilinear_filter_16width_msa(const uint8_t *src,
   int32_t ref_stride,
   int32_t height)
 {
-int32_t ht_cnt;
+int32_t ht_cnt = height >> 3;
+int32_t res = height & 0x07;
 v16u8 src0, src1, src2, src3, comp0, comp1;
 v16u8 ref00, ref10, ref20, ref30, ref01, ref11, ref21, ref31;
 v8u16 sad = { 0 };
 
-for (ht_cnt = (height >> 3); ht_cnt--;) {
+for (; ht_cnt--; ) {
 LD_UB4(src, src_stride, src0, src1, src2, src3);
 src += (4 * src_stride);
 LD_UB4(ref, ref_stride, ref00, ref10, ref20, ref30);
@@ -145,6 +181,17 @@ static uint32_t 
sad_horiz_bilinear_filter_16width_msa(const uint8_t *src,
 sad += SAD_UB2_UH(src2, src3, comp0, comp1);
 }
 
+for (; res--; ) {
+v16u8 diff;
+src0  = LD_UB(src);
+  

[FFmpeg-devel] [PATCH v1 1/1] avfilter/vf_tonemap: add herimte tone mapping

2022-09-08 Thread mirsfang
From: mirs 

Signed-off-by: mirs 

add tonemapping hermite,this looks close real hdr display

---
 libavfilter/vf_tonemap.c | 65 
 1 file changed, 65 insertions(+)

diff --git a/libavfilter/vf_tonemap.c b/libavfilter/vf_tonemap.c
index d1087e6bd9..3fb2bdfa2c 100644
--- a/libavfilter/vf_tonemap.c
+++ b/libavfilter/vf_tonemap.c
@@ -48,6 +48,7 @@ enum TonemapAlgorithm {
 TONEMAP_REINHARD,
 TONEMAP_HABLE,
 TONEMAP_MOBIUS,
+TONEMAP_HERMITE,
 TONEMAP_MAX,
 };
 
@@ -106,6 +107,66 @@ static float mobius(float in, float j, double peak)
 return (b * b + 2.0f * b * j + j * j) / (b - a) * (in + a) / (in + b);
 }
 
+static float hermite(float in) {
+/*
+ * in theory, max mastering lumi 、max content lumi frome packet side data,
+ * display max lumi from Display system value,current value is 
R-REP-BT.2390 max value 
+ */
+float max_mastering_lumi = 1000.0f;
+float max_content_lumi = 1000.0f;
+float display_max_lumi = 400.0f; // Terminal domain 400NITS is the maximum 
lumen of a normal screen
+
+float max_in_lumi = FFMAX(max_mastering_lumi,max_content_lumi);
+float max_out_lumi = display_max_lumi;
+float nits = in * (display_max_lumi / max_mastering_lumi);
+
+// clamp
+if (nits < 0) {
+nits = 0.0;
+} else if (nits > max_in_lumi) {
+nits = max_in_lumi;
+}
+
+if (max_in_lumi <= max_out_lumi) {
+nits *= max_out_lumi / max_in_lumi;
+} else {
+// three control points
+const float x0 = 10.0f;
+const float y0 = 17.0;
+float x1 = max_out_lumi * 0.75;
+float y1 = x1;
+float x2 = x1 + (max_in_lumi - x1) / 2.0;
+float y2 = y1 + (max_out_lumi - y1) * 0.75;
+// horizontal distances between the last three control points
+float h12 = x2 - x1;
+float h23 = max_in_lumi - x2;
+// tangents at the last three control points
+float m1 = (y2 - y1) / h12;
+float m3 = (max_out_lumi - y2) / h23;
+float m2 = (m1 + m3) / 2.0;
+
+if (nits < x0) {
+// scale [0.0, x0] to [0.0, y0] linearly
+float slope = y0 / x0;
+nits *= slope;
+} else if (nits < x1) {
+// scale [x0, x1] to [y0, y1] linearly
+float slope = (y1 - y0) / (x1 - x0);
+nits = y0 + (nits - x0) * slope;
+} else if (nits < x2) {
+// scale [x1, x2] to [y1, y2] using Hermite interp
+float t = (nits - x1) / h12;
+nits = (y1 * (1.0 + 2.0 * t) + h12 * m1 * t) * (1.0 - t) * (1.0 - 
t) +(y2 * (3.0 - 2.0 * t) + h12 * m2 * (t - 1.0)) * t * t;
+} else { 
+// scale [x2, maxInLumi] to [y2, maxOutLumi] using Hermite interp
+float t = (nits - x2) / h23;
+nits = (y2 * (1.0 + 2.0 * t) + h23 * m2 * t) * (1.0 - t) * (1.0 - 
t) +(max_out_lumi * (3.0 - 2.0 * t) + h23 * m3 * (t - 1.0)) * t * t;
+}
+}
+
+return nits;
+}
+
 #define MIX(x,y,a) (x) * (1 - (a)) + (y) * (a)
 static void tonemap(TonemapContext *s, AVFrame *out, const AVFrame *in,
 const AVPixFmtDescriptor *desc, int x, int y, double peak)
@@ -163,6 +224,9 @@ static void tonemap(TonemapContext *s, AVFrame *out, const 
AVFrame *in,
 case TONEMAP_MOBIUS:
 sig = mobius(sig, s->param, peak);
 break;
+case TONEMAP_HERMITE:
+sig = hermite(sig);
+break;
 }
 
 /* apply the computed scale factor to the color,
@@ -291,6 +355,7 @@ static const AVOption tonemap_options[] = {
 { "reinhard", 0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_REINHARD},  
0, 0, FLAGS, "tonemap" },
 { "hable",0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HABLE}, 
0, 0, FLAGS, "tonemap" },
 { "mobius",   0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_MOBIUS},
0, 0, FLAGS, "tonemap" },
+{ "hermite",  0, 0, AV_OPT_TYPE_CONST, {.i64 = TONEMAP_HERMITE},   
   0, 0, FLAGS, "tonemap" },
 { "param","tonemap parameter", OFFSET(param), AV_OPT_TYPE_DOUBLE, 
{.dbl = NAN}, DBL_MIN, DBL_MAX, FLAGS },
 { "desat","desaturation strength", OFFSET(desat), 
AV_OPT_TYPE_DOUBLE, {.dbl = 2}, 0, DBL_MAX, FLAGS },
 { "peak", "signal peak override", OFFSET(peak), 
AV_OPT_TYPE_DOUBLE, {.dbl = 0}, 0, DBL_MAX, FLAGS },
-- 
2.32.1 (Apple Git-133)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec: Vorbis decode: don't use a flag to determine if frames have been output

2022-09-08 Thread Paul B Mahol
On 9/8/22, jyrk...@nekonyansoft.com  wrote:
> From: Jyrki Vesterinen 
>
> If a developer using FFmpeg libraries seeks into an earlier position and
> calls
> avcodec_flush_buffers() afterwards as recommended, the Vorbis decoder will
> drop
> the next frame, since buffer flushing clears the first_frame flag. As a
> result,
> the audio samples the calling code receives may be ahead of the requested
> seek
> position, which is unacceptable in some use cases such as playing a looping
> sound effect.
>
> This commit records the presentation timestamp of the first frame and
> determines after that if the new frame is the first frame (possible after
> seeking to the start) by comparing its pts to the stored pts.
> ---
>  libavcodec/vorbisdec.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
> index 4d03947c49..38a5367be3 100644
> --- a/libavcodec/vorbisdec.c
> +++ b/libavcodec/vorbisdec.c
> @@ -131,6 +131,7 @@ typedef struct vorbis_context_s {
>
>  FFTContext mdct[2];
>  uint8_t   first_frame;
> +int64_t   initial_pts;
>  uint32_t  version;
>  uint8_t   audio_channels;
>  uint32_t  audio_samplerate;
> @@ -1847,6 +1848,10 @@ static int vorbis_decode_frame(AVCodecContext *avctx,
> AVFrame *frame,
>
>  if (!vc->first_frame) {
>  vc->first_frame = 1;
> +vc->initial_pts = frame->pts;
> +}
> +
> +if (frame->pts == vc->initial_pts) {
>  *got_frame_ptr = 0;
>  av_frame_unref(frame);
>  return buf_size;
> @@ -1881,7 +1886,6 @@ static av_cold void vorbis_decode_flush(AVCodecContext
> *avctx)
>   sizeof(*vc->saved));
>  }
>  vc->previous_window = -1;
> -vc->first_frame = 0;
>  }
>
>  const FFCodec ff_vorbis_decoder = {
> --
> 2.37.2.windows.2
>


LGTM

> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec: Vorbis decode: don't use a flag to determine if frames have been output

2022-09-08 Thread jyrkive
From: Jyrki Vesterinen 

If a developer using FFmpeg libraries seeks into an earlier position and calls
avcodec_flush_buffers() afterwards as recommended, the Vorbis decoder will drop
the next frame, since buffer flushing clears the first_frame flag. As a result,
the audio samples the calling code receives may be ahead of the requested seek
position, which is unacceptable in some use cases such as playing a looping
sound effect.

This commit records the presentation timestamp of the first frame and
determines after that if the new frame is the first frame (possible after
seeking to the start) by comparing its pts to the stored pts.
---
 libavcodec/vorbisdec.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 4d03947c49..38a5367be3 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -131,6 +131,7 @@ typedef struct vorbis_context_s {
 
 FFTContext mdct[2];
 uint8_t   first_frame;
+int64_t   initial_pts;
 uint32_t  version;
 uint8_t   audio_channels;
 uint32_t  audio_samplerate;
@@ -1847,6 +1848,10 @@ static int vorbis_decode_frame(AVCodecContext *avctx, 
AVFrame *frame,
 
 if (!vc->first_frame) {
 vc->first_frame = 1;
+vc->initial_pts = frame->pts;
+}
+
+if (frame->pts == vc->initial_pts) {
 *got_frame_ptr = 0;
 av_frame_unref(frame);
 return buf_size;
@@ -1881,7 +1886,6 @@ static av_cold void vorbis_decode_flush(AVCodecContext 
*avctx)
  sizeof(*vc->saved));
 }
 vc->previous_window = -1;
-vc->first_frame = 0;
 }
 
 const FFCodec ff_vorbis_decoder = {
-- 
2.37.2.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec: Vorbis decode: don't use a flag to determine if frames have been output

2022-09-08 Thread jyrkive
Thanks, Paul. I'm not very familiar with the FFmpeg codebase. This new patch 
attempts to implement your suggestion. Works fine in my tests, at least.


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] ffmpeg: fix increment of sync_opts

2022-09-08 Thread Xiaolei Yu
The encoder time base could been overridden by the command line argument
and may not always be 1/ost->frame_rate.
---
 fftools/ffmpeg.c |  2 +-
 tests/fate/ffmpeg.mak|  5 +
 tests/ref/fate/enc_time_base-vfr | 25 +
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 tests/ref/fate/enc_time_base-vfr

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 0e1477299d..8bc48125c8 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -1344,7 +1344,7 @@ static void do_video_out(OutputFile *of,
 if (ret < 0 && ret != AVERROR_EOF)
 exit_program(1);
 
-ost->sync_opts++;
+ost->sync_opts += av_rescale_q(1, av_inv_q(ost->frame_rate), 
ost->enc_ctx->time_base);
 ost->vsync_frame_number++;
 }
 
diff --git a/tests/fate/ffmpeg.mak b/tests/fate/ffmpeg.mak
index 38a1ae7ed5..a3be79d65e 100644
--- a/tests/fate/ffmpeg.mak
+++ b/tests/fate/ffmpeg.mak
@@ -209,3 +209,8 @@ FATE_TIME_BASE-$(call PARSERDEMDEC, MPEGVIDEO, MPEGPS, 
MPEG2VIDEO, MPEGVIDEO_DEM
 fate-time_base: CMD = md5 -i $(TARGET_SAMPLES)/mpeg2/dvd_single_frame.vob -an 
-sn -c:v copy -r 25 -time_base 1001:3 -fflags +bitexact -f mxf
 
 FATE_SAMPLES_FFMPEG-yes += $(FATE_TIME_BASE-yes)
+
+FATE_ENC_TIME_BASE-$(call ALLYES, LAVFI_INDEV COLOR_FILTER FRAMEMD5_MUXER) += 
fate-enc_time_base-vfr
+fate-enc_time_base-vfr: CMD = framemd5 -auto_conversion_filters -fflags 
+bitexact -flags +bitexact -f lavfi -i "color=r=30:d=1" -enc_time_base 1/600 
-fps_mode vfr -r 15
+
+FATE_SAMPLES_FFMPEG-yes += $(FATE_ENC_TIME_BASE-yes)
diff --git a/tests/ref/fate/enc_time_base-vfr b/tests/ref/fate/enc_time_base-vfr
new file mode 100644
index 00..cb38177d8e
--- /dev/null
+++ b/tests/ref/fate/enc_time_base-vfr
@@ -0,0 +1,25 @@
+#format: frame checksums
+#version: 2
+#hash: MD5
+#tb 0: 1/600
+#media_type 0: video
+#codec_id 0: rawvideo
+#dimensions 0: 320x240
+#sar 0: 1/1
+#stream#, dts,pts, duration, size, hash
+0,  0,  0,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0, 40, 40,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0, 80, 80,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,120,120,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,160,160,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,200,200,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,240,240,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,280,280,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,320,320,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,360,360,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,400,400,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,440,440,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,480,480,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,520,520,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,560,560,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
+0,600,600,   40,   115200, 8e4dd5c5c31a54672e30503f6ee13321
-- 
2.37.2
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec: Vorbis decode: don't use a flag to determine if frames have been output

2022-09-08 Thread Paul B Mahol
On Thu, Sep 8, 2022 at 10:26 AM  wrote:

> From: Jyrki Vesterinen 
>
> If a developer using FFmpeg libraries seeks into an earlier position and
> calls
> avcodec_flush_buffers() afterwards as recommended, the Vorbis decoder will
> drop
> the next frame, since buffer flushing clears the first_frame flag. As a
> result,
> the audio samples the calling code receives may be ahead of the requested
> seek
> position, which is unacceptable in some use cases such as playing a looping
> sound effect.
>
> This commit removes the first_frame flag entirely and instead uses the
> presentation timestamp to determine if it's the first frame.
>

Proper solution is to fetch initial/first pts and use that one instead
using of using
fragile pts < 0.


> ---
>  libavcodec/vorbisdec.c | 5 +
>  1 file changed, 1 insertion(+), 4 deletions(-)
>
> diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
> index 4d03947c49..d4b030d7b9 100644
> --- a/libavcodec/vorbisdec.c
> +++ b/libavcodec/vorbisdec.c
> @@ -130,7 +130,6 @@ typedef struct vorbis_context_s {
>  AVFloatDSPContext *fdsp;
>
>  FFTContext mdct[2];
> -uint8_t   first_frame;
>  uint32_t  version;
>  uint8_t   audio_channels;
>  uint32_t  audio_samplerate;
> @@ -1845,8 +1844,7 @@ static int vorbis_decode_frame(AVCodecContext
> *avctx, AVFrame *frame,
>  if ((len = vorbis_parse_audio_packet(vc, channel_ptrs)) <= 0)
>  return len;
>
> -if (!vc->first_frame) {
> -vc->first_frame = 1;
> +if (frame->pts < 0) {
>  *got_frame_ptr = 0;
>  av_frame_unref(frame);
>  return buf_size;
> @@ -1881,7 +1879,6 @@ static av_cold void
> vorbis_decode_flush(AVCodecContext *avctx)
>   sizeof(*vc->saved));
>  }
>  vc->previous_window = -1;
> -vc->first_frame = 0;
>  }
>
>  const FFCodec ff_vorbis_decoder = {
> --
> 2.37.2.windows.2
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avcodec: Vorbis decode: don't use a flag to determine if frames have been output

2022-09-08 Thread jyrkive
From: Jyrki Vesterinen 

If a developer using FFmpeg libraries seeks into an earlier position and calls
avcodec_flush_buffers() afterwards as recommended, the Vorbis decoder will drop
the next frame, since buffer flushing clears the first_frame flag. As a result,
the audio samples the calling code receives may be ahead of the requested seek
position, which is unacceptable in some use cases such as playing a looping
sound effect.

This commit removes the first_frame flag entirely and instead uses the
presentation timestamp to determine if it's the first frame.
---
 libavcodec/vorbisdec.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 4d03947c49..d4b030d7b9 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -130,7 +130,6 @@ typedef struct vorbis_context_s {
 AVFloatDSPContext *fdsp;
 
 FFTContext mdct[2];
-uint8_t   first_frame;
 uint32_t  version;
 uint8_t   audio_channels;
 uint32_t  audio_samplerate;
@@ -1845,8 +1844,7 @@ static int vorbis_decode_frame(AVCodecContext *avctx, 
AVFrame *frame,
 if ((len = vorbis_parse_audio_packet(vc, channel_ptrs)) <= 0)
 return len;
 
-if (!vc->first_frame) {
-vc->first_frame = 1;
+if (frame->pts < 0) {
 *got_frame_ptr = 0;
 av_frame_unref(frame);
 return buf_size;
@@ -1881,7 +1879,6 @@ static av_cold void vorbis_decode_flush(AVCodecContext 
*avctx)
  sizeof(*vc->saved));
 }
 vc->previous_window = -1;
-vc->first_frame = 0;
 }
 
 const FFCodec ff_vorbis_decoder = {
-- 
2.37.2.windows.2

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".