Re: [FFmpeg-devel] [PATCH] avformat/matroska: Support HDR10+ metadata in Matroska.

2021-08-17 Thread Gyan Doshi



On 2021-08-18 04:10 am, Mohammad Izadi wrote:

From: Gyan Doshi 


Can you refresh my memory on how I'm involved?



The fate test file can be found here: 
https://drive.google.com/file/d/1jGW3f94rglLfr5WGmMQe3SEnp1YkbMRy/view?usp=drivesdk
The video file needs to be copied to fate-suite/mkv/
---
  libavcodec/dynamic_hdr10_plus.c | 273 +---
  libavcodec/dynamic_hdr10_plus.h |  35 ++-
  libavformat/matroska.h  |   5 +
  libavformat/matroskadec.c   |  30 ++-
  libavformat/matroskaenc.c   |  47 ++--
  tests/fate/matroska.mak |   6 +
  tests/ref/fate/matroska-hdr10-plus-metadata | 150 +++
  7 files changed, 484 insertions(+), 62 deletions(-)
  create mode 100644 tests/ref/fate/matroska-hdr10-plus-metadata

diff --git a/libavcodec/dynamic_hdr10_plus.c b/libavcodec/dynamic_hdr10_plus.c
index a602e606ed..df7828a476 100644
--- a/libavcodec/dynamic_hdr10_plus.c
+++ b/libavcodec/dynamic_hdr10_plus.c
@@ -18,6 +18,12 @@
  
  #include "dynamic_hdr10_plus.h"

  #include "get_bits.h"
+#include "put_bits.h"
+
+static const uint8_t usa_country_code = 0xB5;
+static const uint16_t smpte_provider_code = 0x003C;
+static const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
+static const uint16_t smpte2094_40_application_identifier = 0x04;
  
  static const int64_t luminance_den = 1;

  static const int32_t peak_luminance_den = 15;
@@ -27,8 +33,8 @@ static const int32_t knee_point_den = 4095;
  static const int32_t bezier_anchor_den = 1023;
  static const int32_t saturation_weight_den = 8;
  
-int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const uint8_t *data,

- int size)
+int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus* s, const 
uint8_t* data,
+int size)
  {
  GetBitContext gbc, *gb = 
  int ret;
@@ -40,7 +46,9 @@ int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus 
*s, const uint8_t
  if (ret < 0)
  return ret;
  
-s->application_version = get_bits(gb, 8);

+if (get_bits_left(gb) < 8)
+return AVERROR_INVALIDDATA;
+ s->application_version = get_bits(gb, 8);
  
  if (get_bits_left(gb) < 2)

  return AVERROR_INVALIDDATA;
@@ -56,15 +64,11 @@ int 
ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const uint8_t
  for (int w = 1; w < s->num_windows; w++) {
  // The corners are set to absolute coordinates here. They should be
  // converted to the relative coordinates (in [0, 1]) in the decoder.
-AVHDRPlusColorTransformParams *params = >params[w];
-params->window_upper_left_corner_x =
-(AVRational){get_bits(gb, 16), 1};
-params->window_upper_left_corner_y =
-(AVRational){get_bits(gb, 16), 1};
-params->window_lower_right_corner_x =
-(AVRational){get_bits(gb, 16), 1};
-params->window_lower_right_corner_y =
-(AVRational){get_bits(gb, 16), 1};
+AVHDRPlusColorTransformParams* params = >params[w];
+params->window_upper_left_corner_x = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_upper_left_corner_y = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_lower_right_corner_x = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_lower_right_corner_y = (AVRational) { get_bits(gb, 16), 
1 };
  
  params->center_of_ellipse_x = get_bits(gb, 16);

  params->center_of_ellipse_y = get_bits(gb, 16);
@@ -78,8 +82,7 @@ int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus 
*s, const uint8_t
  if (get_bits_left(gb) < 28)
  return AVERROR(EINVAL);
  
-s->targeted_system_display_maximum_luminance =

-(AVRational){get_bits_long(gb, 27), luminance_den};
+s->targeted_system_display_maximum_luminance = (AVRational) { 
get_bits_long(gb, 27), luminance_den };
  s->targeted_system_display_actual_peak_luminance_flag = get_bits1(gb);
  
  if (s->targeted_system_display_actual_peak_luminance_flag) {

@@ -99,38 +102,33 @@ int 
ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const uint8_t
  
  for (int i = 0; i < rows; i++) {

  for (int j = 0; j < cols; j++) {
-s->targeted_system_display_actual_peak_luminance[i][j] =
-(AVRational){get_bits(gb, 4), peak_luminance_den};
+s->targeted_system_display_actual_peak_luminance[i][j] = 
(AVRational) { get_bits(gb, 4), peak_luminance_den };
  }
  }
  }
  for (int w = 0; w < s->num_windows; w++) {
-AVHDRPlusColorTransformParams *params = >params[w];
+AVHDRPlusColorTransformParams* params = >params[w];
  if (get_bits_left(gb) < (3 * 17 + 17 + 4))
  return AVERROR(EINVAL);
  
  for (int i = 0; i < 3; i++) {

-

Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread Wu, Jianhua
 James Almer W  rote: 
> On 8/17/2021 4:24 PM, James Almer wrote:
> > On 8/17/2021 12:25 PM, Ronald S. Bultje wrote:
> >> Hi,
> >>
> >> On Tue, Aug 17, 2021 at 2:33 AM Hendrik Leppkes 
> >> wrote:
> >>
> >>> On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua 
> wrote:
>  Based on IceLake-AVX512 and newer architecture, a broad range of
>  the subsets of AVX512 could be supported.
> >>>
> >> [..]
> >>
>  -    enabled avx512 && check_x86asm avx512_external "vmovdqa32
> >>> [eax]{k1}{z}, zmm0"
>  +    # Only IceLake and newer architectures could enable AVX512
>  +    #
> >>>
> F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VA
> ES/VPCLMU
> >>> LQDQ
>  +    enabled avx512 && check_x86asm avx512_external "vpdpwssds
> >>> zmm31{k1}{z}, zmm29, zmm28"
>    enabled avx2   && check_x86asm avx2_external
>  "vextracti128
> >>> xmm0, ymm0, 0"
>    enabled xop    && check_x86asm xop_external    "vpmacsdd
>  xmm0,
> >>> xmm1, xmm2, xmm3"
>    enabled fma4   && check_x86asm fma4_external   "vfmaddps
>  ymm0,
> >>> ymm1, ymm2, ymm3"
> >>>
> >>> Note that you are just checking the functionality of the assembler
> >>> here, not having a runtime impact.
> >>> What you would likely want is to update avutil/x86/cpu.c as well to
> >>> only enable the AVX512 flag on those CPUs.
> >>>
> >>
> >> [After IRC discussion] you want runtime checks for the
> >> variants/combinations-of-subsets that we want to support. Right now,
> >> avx512
> >> means skylake, so you may want to rename that flag to "avx512skl",
> >> and add a new runtime flag + check for the icelake subset called
> >> "avx512icl".
> >> Then
> >> in your implementations, you use the appropriate flag, and code
> >> components can individually choose to use skylake- and/or
> >> icelake-optimized ax512 functions.
> >
> > Does it really mean Skylake-X? Afaik the flag checks in cpu.c
> > currently look for AVX-512 Foundation and ZMM support, so it means
> > Knights Landing or newer.
> 
> Nevermind, just noticed the comment in cpu.c that mentions the Skylake-X
> extensions.
> 
> >
> > What about just making the existing AVX512 flag mean F+VL+DQ+BW, so
> > Skylake-X (Anything older just lacks useful instructions for
> > multimedia), and if needed for this new code add a new avx512icl flag
> > that also looks for something like GFNI.
> 
> Assuming making Ice Lake the minimum supported SKU is not acceptable,
> then your suggestion is fine (Sans the renaming, since it's a breaking 
> change).
> 
Hi,

The reason why we set the icelake-avx512 as the minimum baseline is that
we don't want FFmpeg run on a processor downclocking heavily, hence, keep
the current flag and add a new AVX512ICL make no sense. 

I was supposed to add a cpuid check for F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/
VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ in cpu.c in v2 patch
and no need to change flags. 

Jianhua
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/2] avcodec/libx265: improve full range flag setting logic

2021-08-17 Thread myp...@gmail.com
On Wed, Aug 18, 2021 at 5:01 AM Jan Ekström  wrote:
>
> Unlike libx264, libx265 does not have a separate "unspecified"/"auto"
> default for color range, so we do always have to specify it.
> Thus, we are required to handle the RGB case on the libavcodec
> side to enable the correct value to be written out in in case
> of RGB content with unspecified color range being received.
>
> In other words:
> 1. If the user has set color range specifically, follow that.
> 2. If the user has not set color range specifically, set full
>range by default in case of RGB and YUVJ pixel formats.
> ---
>  libavcodec/libx265.c | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
> index 9395120471..b5c94b64a3 100644
> --- a/libavcodec/libx265.c
> +++ b/libavcodec/libx265.c
> @@ -181,10 +181,15 @@ static av_cold int libx265_encode_init(AVCodecContext 
> *avctx)
>
>  ctx->params->vui.bEnableVideoSignalTypePresentFlag = 1;
>
> -ctx->params->vui.bEnableVideoFullRangeFlag = avctx->pix_fmt == 
> AV_PIX_FMT_YUVJ420P ||
> - avctx->pix_fmt == 
> AV_PIX_FMT_YUVJ422P ||
> - avctx->pix_fmt == 
> AV_PIX_FMT_YUVJ444P ||
> - avctx->color_range == 
> AVCOL_RANGE_JPEG;
> +if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
> +ctx->params->vui.bEnableVideoFullRangeFlag =
> +avctx->color_range == AVCOL_RANGE_JPEG;
> +else
> +ctx->params->vui.bEnableVideoFullRangeFlag =
> +(av_pix_fmt_desc_get(avctx->pix_fmt)->flags & 
> AV_PIX_FMT_FLAG_RGB) ||
> +avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
> +avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
> +avctx->pix_fmt == AV_PIX_FMT_YUVJ444P;
>
>  if ((avctx->color_primaries <= AVCOL_PRI_SMPTE432 &&
>   avctx->color_primaries != AVCOL_PRI_UNSPECIFIED) ||
> --
> 2.31.1
Patchset LGTM
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain

2021-08-17 Thread James Almer

On 8/17/2021 4:54 PM, Niklas Haas wrote:

From: Niklas Haas 

Because we need access to ref frames without film grain applied, we have
to add an extra AVFrame to H264Picture to avoid messing with the
original. This requires some amount of overhead to make the reference
moves work out, but it allows us to benefit from frame multithreading
for film grain application "for free".

Unfortunately, this approach requires twice as much RAM to be constantly
allocated for ref frames, due to the need for an extra buffer per
H264Picture. In theory, we could get away with freeing up this memory as
soon as it's no longer needed (since ref frames do not need film grain
buffers any longer), but trying to call ff_thread_release_buffer() from
output_frame() conflicts with possible later accesses to that same frame
and I'm not sure how to synchronize that well.

Tested on all three cases of (no fg), (fg present but exported) and (fg
present and not exported), with and without threading.

Signed-off-by: Niklas Haas 
---
  libavcodec/h264_picture.c | 35 +++--
  libavcodec/h264_slice.c   | 16 ++--
  libavcodec/h264dec.c  | 55 ++-
  libavcodec/h264dec.h  |  6 +
  4 files changed, 90 insertions(+), 22 deletions(-)


Looks good now, so I'll apply it soon.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] avformat/matroska: Support HDR10+ metadata in Matroska.

2021-08-17 Thread Mohammad Izadi
From: Gyan Doshi 

The fate test file can be found here: 
https://drive.google.com/file/d/1jGW3f94rglLfr5WGmMQe3SEnp1YkbMRy/view?usp=drivesdk
The video file needs to be copied to fate-suite/mkv/
---
 libavcodec/dynamic_hdr10_plus.c | 273 +---
 libavcodec/dynamic_hdr10_plus.h |  35 ++-
 libavformat/matroska.h  |   5 +
 libavformat/matroskadec.c   |  30 ++-
 libavformat/matroskaenc.c   |  47 ++--
 tests/fate/matroska.mak |   6 +
 tests/ref/fate/matroska-hdr10-plus-metadata | 150 +++
 7 files changed, 484 insertions(+), 62 deletions(-)
 create mode 100644 tests/ref/fate/matroska-hdr10-plus-metadata

diff --git a/libavcodec/dynamic_hdr10_plus.c b/libavcodec/dynamic_hdr10_plus.c
index a602e606ed..df7828a476 100644
--- a/libavcodec/dynamic_hdr10_plus.c
+++ b/libavcodec/dynamic_hdr10_plus.c
@@ -18,6 +18,12 @@
 
 #include "dynamic_hdr10_plus.h"
 #include "get_bits.h"
+#include "put_bits.h"
+
+static const uint8_t usa_country_code = 0xB5;
+static const uint16_t smpte_provider_code = 0x003C;
+static const uint16_t smpte2094_40_provider_oriented_code = 0x0001;
+static const uint16_t smpte2094_40_application_identifier = 0x04;
 
 static const int64_t luminance_den = 1;
 static const int32_t peak_luminance_den = 15;
@@ -27,8 +33,8 @@ static const int32_t knee_point_den = 4095;
 static const int32_t bezier_anchor_den = 1023;
 static const int32_t saturation_weight_den = 8;
 
-int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const 
uint8_t *data,
- int size)
+int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus* s, const 
uint8_t* data,
+int size)
 {
 GetBitContext gbc, *gb = 
 int ret;
@@ -40,7 +46,9 @@ int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus 
*s, const uint8_t
 if (ret < 0)
 return ret;
 
-s->application_version = get_bits(gb, 8);
+if (get_bits_left(gb) < 8)
+return AVERROR_INVALIDDATA;
+ s->application_version = get_bits(gb, 8);
 
 if (get_bits_left(gb) < 2)
 return AVERROR_INVALIDDATA;
@@ -56,15 +64,11 @@ int 
ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const uint8_t
 for (int w = 1; w < s->num_windows; w++) {
 // The corners are set to absolute coordinates here. They should be
 // converted to the relative coordinates (in [0, 1]) in the decoder.
-AVHDRPlusColorTransformParams *params = >params[w];
-params->window_upper_left_corner_x =
-(AVRational){get_bits(gb, 16), 1};
-params->window_upper_left_corner_y =
-(AVRational){get_bits(gb, 16), 1};
-params->window_lower_right_corner_x =
-(AVRational){get_bits(gb, 16), 1};
-params->window_lower_right_corner_y =
-(AVRational){get_bits(gb, 16), 1};
+AVHDRPlusColorTransformParams* params = >params[w];
+params->window_upper_left_corner_x = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_upper_left_corner_y = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_lower_right_corner_x = (AVRational) { get_bits(gb, 16), 
1 };
+params->window_lower_right_corner_y = (AVRational) { get_bits(gb, 16), 
1 };
 
 params->center_of_ellipse_x = get_bits(gb, 16);
 params->center_of_ellipse_y = get_bits(gb, 16);
@@ -78,8 +82,7 @@ int ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus 
*s, const uint8_t
 if (get_bits_left(gb) < 28)
 return AVERROR(EINVAL);
 
-s->targeted_system_display_maximum_luminance =
-(AVRational){get_bits_long(gb, 27), luminance_den};
+s->targeted_system_display_maximum_luminance = (AVRational) { 
get_bits_long(gb, 27), luminance_den };
 s->targeted_system_display_actual_peak_luminance_flag = get_bits1(gb);
 
 if (s->targeted_system_display_actual_peak_luminance_flag) {
@@ -99,38 +102,33 @@ int 
ff_parse_itu_t_t35_to_dynamic_hdr10_plus(AVDynamicHDRPlus *s, const uint8_t
 
 for (int i = 0; i < rows; i++) {
 for (int j = 0; j < cols; j++) {
-s->targeted_system_display_actual_peak_luminance[i][j] =
-(AVRational){get_bits(gb, 4), peak_luminance_den};
+s->targeted_system_display_actual_peak_luminance[i][j] = 
(AVRational) { get_bits(gb, 4), peak_luminance_den };
 }
 }
 }
 for (int w = 0; w < s->num_windows; w++) {
-AVHDRPlusColorTransformParams *params = >params[w];
+AVHDRPlusColorTransformParams* params = >params[w];
 if (get_bits_left(gb) < (3 * 17 + 17 + 4))
 return AVERROR(EINVAL);
 
 for (int i = 0; i < 3; i++) {
-params->maxscl[i] =
-(AVRational){get_bits(gb, 17), rgb_den};
+params->maxscl[i] = (AVRational) { get_bits(gb, 17), rgb_den };
 }
-

[FFmpeg-devel] [PATCH 2/2] avcodec/libx265: improve full range flag setting logic

2021-08-17 Thread Jan Ekström
Unlike libx264, libx265 does not have a separate "unspecified"/"auto"
default for color range, so we do always have to specify it.
Thus, we are required to handle the RGB case on the libavcodec
side to enable the correct value to be written out in in case
of RGB content with unspecified color range being received.

In other words:
1. If the user has set color range specifically, follow that.
2. If the user has not set color range specifically, set full
   range by default in case of RGB and YUVJ pixel formats.
---
 libavcodec/libx265.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/libavcodec/libx265.c b/libavcodec/libx265.c
index 9395120471..b5c94b64a3 100644
--- a/libavcodec/libx265.c
+++ b/libavcodec/libx265.c
@@ -181,10 +181,15 @@ static av_cold int libx265_encode_init(AVCodecContext 
*avctx)
 
 ctx->params->vui.bEnableVideoSignalTypePresentFlag = 1;
 
-ctx->params->vui.bEnableVideoFullRangeFlag = avctx->pix_fmt == 
AV_PIX_FMT_YUVJ420P ||
- avctx->pix_fmt == 
AV_PIX_FMT_YUVJ422P ||
- avctx->pix_fmt == 
AV_PIX_FMT_YUVJ444P ||
- avctx->color_range == 
AVCOL_RANGE_JPEG;
+if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
+ctx->params->vui.bEnableVideoFullRangeFlag =
+avctx->color_range == AVCOL_RANGE_JPEG;
+else
+ctx->params->vui.bEnableVideoFullRangeFlag =
+(av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_RGB) 
||
+avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
+avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
+avctx->pix_fmt == AV_PIX_FMT_YUVJ444P;
 
 if ((avctx->color_primaries <= AVCOL_PRI_SMPTE432 &&
  avctx->color_primaries != AVCOL_PRI_UNSPECIFIED) ||
-- 
2.31.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH 1/2] avcodec/libx264: leave full range flag unchanged if unknown

2021-08-17 Thread Jan Ekström
By default the x264 full range flag is set to -1. By not setting
it to something else, we can let libx264 handle the RGB case.
Additionally, change the preference order to user-specified range
first, and then any fall-back logic left for the YUVJ pix_fmts.

Fixes the capture part of #9374
---
 libavcodec/libx264.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index 9afaf19547..d48e142e41 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -857,10 +857,12 @@ static av_cold int X264_init(AVCodecContext *avctx)
 
 x4->params.i_slice_count  = avctx->slices;
 
-x4->params.vui.b_fullrange = avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
- avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
- avctx->pix_fmt == AV_PIX_FMT_YUVJ444P ||
- avctx->color_range == AVCOL_RANGE_JPEG;
+if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
+x4->params.vui.b_fullrange = avctx->color_range == AVCOL_RANGE_JPEG;
+else if (avctx->pix_fmt == AV_PIX_FMT_YUVJ420P ||
+ avctx->pix_fmt == AV_PIX_FMT_YUVJ422P ||
+ avctx->pix_fmt == AV_PIX_FMT_YUVJ444P)
+x4->params.vui.b_fullrange = 1;
 
 if (avctx->colorspace != AVCOL_SPC_UNSPECIFIED)
 x4->params.vui.i_colmatrix = avctx->colorspace;
-- 
2.31.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread Ronald S. Bultje
Hi,

On Tue, Aug 17, 2021 at 3:27 PM James Almer  wrote:

> On 8/17/2021 4:24 PM, James Almer wrote:
> > On 8/17/2021 12:25 PM, Ronald S. Bultje wrote:
> >> On Tue, Aug 17, 2021 at 2:33 AM Hendrik Leppkes 
> >> wrote:
> >>> On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua 
> wrote:
>  Based on IceLake-AVX512 and newer architecture, a broad
>  range of the subsets of AVX512 could be supported.
> >> [..]
>  -enabled avx512 && check_x86asm avx512_external "vmovdqa32
> >>> [eax]{k1}{z}, zmm0"
>  +# Only IceLake and newer architectures could enable AVX512
>  +#
> >>>
> F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
>  +enabled avx512 && check_x86asm avx512_external "vpdpwssds
> >>> zmm31{k1}{z}, zmm29, zmm28"
>    enabled avx2   && check_x86asm avx2_external   "vextracti128
> >>> xmm0, ymm0, 0"
>    enabled xop&& check_x86asm xop_external"vpmacsdd
>  xmm0,
> >>> xmm1, xmm2, xmm3"
>    enabled fma4   && check_x86asm fma4_external   "vfmaddps
>  ymm0,
> >>> ymm1, ymm2, ymm3"
> >>>
> >>> Note that you are just checking the functionality of the assembler
> >>> here, not having a runtime impact.
> >>> What you would likely want is to update avutil/x86/cpu.c as well to
> >>> only enable the AVX512 flag on those CPUs.
> >>
> >> [After IRC discussion] you want runtime checks for the
> >> variants/combinations-of-subsets that we want to support. Right now,
> >> avx512
> >> means skylake, so you may want to rename that flag to "avx512skl", and
> >> add
> >> a new runtime flag + check for the icelake subset called "avx512icl".
> >> Then
> >> in your implementations, you use the appropriate flag, and code
> >> components
> >> can individually choose to use skylake- and/or icelake-optimized ax512
> >> functions.
> [..]
>
> What about just making the existing AVX512 flag mean F+VL+DQ+BW, so
> > Skylake-X (Anything older just lacks useful instructions for
> > multimedia), and if needed for this new code add a new avx512icl flag
> > that also looks for something like GFNI.
>
> Assuming making Ice Lake the minimum supported SKU is not acceptable,
> then your suggestion is fine (Sans the renaming, since it's a breaking
> change).
>

Yes, agreed.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v3] avcodec/h264dec: apply H.274 film grain

2021-08-17 Thread Niklas Haas
From: Niklas Haas 

Because we need access to ref frames without film grain applied, we have
to add an extra AVFrame to H264Picture to avoid messing with the
original. This requires some amount of overhead to make the reference
moves work out, but it allows us to benefit from frame multithreading
for film grain application "for free".

Unfortunately, this approach requires twice as much RAM to be constantly
allocated for ref frames, due to the need for an extra buffer per
H264Picture. In theory, we could get away with freeing up this memory as
soon as it's no longer needed (since ref frames do not need film grain
buffers any longer), but trying to call ff_thread_release_buffer() from
output_frame() conflicts with possible later accesses to that same frame
and I'm not sure how to synchronize that well.

Tested on all three cases of (no fg), (fg present but exported) and (fg
present and not exported), with and without threading.

Signed-off-by: Niklas Haas 
---
 libavcodec/h264_picture.c | 35 +++--
 libavcodec/h264_slice.c   | 16 ++--
 libavcodec/h264dec.c  | 55 ++-
 libavcodec/h264dec.h  |  6 +
 4 files changed, 90 insertions(+), 22 deletions(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index ff30166b4d..5944798394 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -43,13 +43,14 @@
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
-int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
+int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
 int i;
 
 if (!pic->f || !pic->f->buf[0])
 return;
 
 ff_thread_release_buffer(h->avctx, >tf);
+ff_thread_release_buffer(h->avctx, >tf_grain);
 av_buffer_unref(>hwaccel_priv_buf);
 
 av_buffer_unref(>qscale_table_buf);
@@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const 
H264Picture *src)
 dst->mb_width  = src->mb_width;
 dst->mb_height = src->mb_height;
 dst->mb_stride = src->mb_stride;
+dst->needs_fg  = src->needs_fg;
 }
 
 int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
@@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, 
H264Picture *src)
 if (ret < 0)
 goto fail;
 
+if (src->needs_fg) {
+av_assert0(src->tf_grain.f == src->f_grain);
+dst->tf_grain.f = dst->f_grain;
+ret = ff_thread_ref_frame(>tf_grain, >tf_grain);
+if (ret < 0)
+goto fail;
+}
+
 dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
 dst->mb_type_buf  = av_buffer_ref(src->mb_type_buf);
 dst->pps_buf  = av_buffer_ref(src->pps_buf);
@@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture 
*dst, const H264Picture
 if (ret < 0)
 goto fail;
 
+if (src->needs_fg) {
+av_assert0(src->tf_grain.f == src->f_grain);
+dst->tf_grain.f = dst->f_grain;
+ff_thread_release_buffer(h->avctx, >tf_grain);
+ret = ff_thread_ref_frame(>tf_grain, >tf_grain);
+if (ret < 0)
+goto fail;
+}
+
 ret  = av_buffer_replace(>qscale_table_buf, src->qscale_table_buf);
 ret |= av_buffer_replace(>mb_type_buf, src->mb_type_buf);
 ret |= av_buffer_replace(>pps_buf, src->pps_buf);
@@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
 int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
 {
 AVCodecContext *const avctx = h->avctx;
+H264Picture *cur = h->cur_pic_ptr;
 int err = 0;
 h->mb_y = 0;
 
@@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext 
*sl, int in_setup)
 if (err < 0)
 av_log(avctx, AV_LOG_ERROR,
"hardware accelerator failed to decode picture\n");
+} else if (!in_setup && cur->needs_fg) {
+AVFrameSideData *sd = av_frame_get_side_data(cur->f, 
AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+av_assert0(sd); // always present if `cur->needs_fg`
+err = ff_h274_apply_film_grain(cur->f_grain, cur->f, >h274db,
+   (AVFilmGrainParams *) sd->data);
+if (err < 0) {
+av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
+   "grain, ignoring: %s\n", av_err2str(err));
+cur->needs_fg = 0;
+err = 0;
+}
 }
 
 if (!in_setup && !h->droppable)
-ff_thread_report_progress(>cur_pic_ptr->tf, INT_MAX,
+ff_thread_report_progress(>tf, INT_MAX,
   h->picture_structure == PICT_BOTTOM_FIELD);
 emms_c();
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 9244d2d5dd..98ca8836db 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
 if (ret < 

Re: [FFmpeg-devel] [PATCH v2 3/3] avcodec/h264dec: apply H.274 film grain

2021-08-17 Thread Niklas Haas
Oops, missing av_frame_unref() in the decoder uninit. Will fix.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread James Almer

On 8/17/2021 4:24 PM, James Almer wrote:

On 8/17/2021 12:25 PM, Ronald S. Bultje wrote:

Hi,

On Tue, Aug 17, 2021 at 2:33 AM Hendrik Leppkes  
wrote:



On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua  wrote:

Based on IceLake-AVX512 and newer architecture, a broad
range of the subsets of AVX512 could be supported.



[..]


-    enabled avx512 && check_x86asm avx512_external "vmovdqa32

[eax]{k1}{z}, zmm0"

+    # Only IceLake and newer architectures could enable AVX512
+    #

F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ

+    enabled avx512 && check_x86asm avx512_external "vpdpwssds

zmm31{k1}{z}, zmm29, zmm28"

  enabled avx2   && check_x86asm avx2_external   "vextracti128

xmm0, ymm0, 0"
  enabled xop    && check_x86asm xop_external    "vpmacsdd 
xmm0,

xmm1, xmm2, xmm3"
  enabled fma4   && check_x86asm fma4_external   "vfmaddps 
ymm0,

ymm1, ymm2, ymm3"

Note that you are just checking the functionality of the assembler
here, not having a runtime impact.
What you would likely want is to update avutil/x86/cpu.c as well to
only enable the AVX512 flag on those CPUs.



[After IRC discussion] you want runtime checks for the
variants/combinations-of-subsets that we want to support. Right now, 
avx512
means skylake, so you may want to rename that flag to "avx512skl", and 
add
a new runtime flag + check for the icelake subset called "avx512icl". 
Then
in your implementations, you use the appropriate flag, and code 
components

can individually choose to use skylake- and/or icelake-optimized ax512
functions.


Does it really mean Skylake-X? Afaik the flag checks in cpu.c currently 
look for AVX-512 Foundation and ZMM support, so it means Knights Landing 
or newer.


Nevermind, just noticed the comment in cpu.c that mentions the Skylake-X 
extensions.




What about just making the existing AVX512 flag mean F+VL+DQ+BW, so 
Skylake-X (Anything older just lacks useful instructions for 
multimedia), and if needed for this new code add a new avx512icl flag 
that also looks for something like GFNI.


Assuming making Ice Lake the minimum supported SKU is not acceptable, 
then your suggestion is fine (Sans the renaming, since it's a breaking 
change).






Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".





___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH v2 3/3] avcodec/h264dec: apply H.274 film grain

2021-08-17 Thread Niklas Haas
From: Niklas Haas 

Because we need access to ref frames without film grain applied, we have
to add an extra AVFrame to H264Picture to avoid messing with the
original. This requires some amount of overhead to make the reference
moves work out, but it allows us to benefit from frame multithreading
for film grain application "for free".

Unfortunately, this approach requires twice as much RAM to be constantly
allocated for ref frames, due to the need for an extra buffer per
H264Picture. In theory, we could get away with freeing up this memory as
soon as it's no longer needed (since ref frames do not need film grain
buffers any longer), but trying to call ff_thread_release_buffer() from
output_frame() conflicts with possible later accesses to that same frame
and I'm not sure how to synchronize that well.

Tested on all three cases of (no fg), (fg present but exported) and (fg
present and not exported), with and without threading.

Signed-off-by: Niklas Haas 
---
 libavcodec/h264_picture.c | 35 +--
 libavcodec/h264_slice.c   | 16 ++--
 libavcodec/h264dec.c  | 39 +++
 libavcodec/h264dec.h  |  6 ++
 4 files changed, 80 insertions(+), 16 deletions(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index ff30166b4d..5944798394 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -43,13 +43,14 @@
 
 void ff_h264_unref_picture(H264Context *h, H264Picture *pic)
 {
-int off = offsetof(H264Picture, tf) + sizeof(pic->tf);
+int off = offsetof(H264Picture, tf_grain) + sizeof(pic->tf_grain);
 int i;
 
 if (!pic->f || !pic->f->buf[0])
 return;
 
 ff_thread_release_buffer(h->avctx, >tf);
+ff_thread_release_buffer(h->avctx, >tf_grain);
 av_buffer_unref(>hwaccel_priv_buf);
 
 av_buffer_unref(>qscale_table_buf);
@@ -93,6 +94,7 @@ static void h264_copy_picture_params(H264Picture *dst, const 
H264Picture *src)
 dst->mb_width  = src->mb_width;
 dst->mb_height = src->mb_height;
 dst->mb_stride = src->mb_stride;
+dst->needs_fg  = src->needs_fg;
 }
 
 int ff_h264_ref_picture(H264Context *h, H264Picture *dst, H264Picture *src)
@@ -108,6 +110,14 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, 
H264Picture *src)
 if (ret < 0)
 goto fail;
 
+if (src->needs_fg) {
+av_assert0(src->tf_grain.f == src->f_grain);
+dst->tf_grain.f = dst->f_grain;
+ret = ff_thread_ref_frame(>tf_grain, >tf_grain);
+if (ret < 0)
+goto fail;
+}
+
 dst->qscale_table_buf = av_buffer_ref(src->qscale_table_buf);
 dst->mb_type_buf  = av_buffer_ref(src->mb_type_buf);
 dst->pps_buf  = av_buffer_ref(src->pps_buf);
@@ -159,6 +169,15 @@ int ff_h264_replace_picture(H264Context *h, H264Picture 
*dst, const H264Picture
 if (ret < 0)
 goto fail;
 
+if (src->needs_fg) {
+av_assert0(src->tf_grain.f == src->f_grain);
+dst->tf_grain.f = dst->f_grain;
+ff_thread_release_buffer(h->avctx, >tf_grain);
+ret = ff_thread_ref_frame(>tf_grain, >tf_grain);
+if (ret < 0)
+goto fail;
+}
+
 ret  = av_buffer_replace(>qscale_table_buf, src->qscale_table_buf);
 ret |= av_buffer_replace(>mb_type_buf, src->mb_type_buf);
 ret |= av_buffer_replace(>pps_buf, src->pps_buf);
@@ -212,6 +231,7 @@ void ff_h264_set_erpic(ERPicture *dst, H264Picture *src)
 int ff_h264_field_end(H264Context *h, H264SliceContext *sl, int in_setup)
 {
 AVCodecContext *const avctx = h->avctx;
+H264Picture *cur = h->cur_pic_ptr;
 int err = 0;
 h->mb_y = 0;
 
@@ -230,10 +250,21 @@ int ff_h264_field_end(H264Context *h, H264SliceContext 
*sl, int in_setup)
 if (err < 0)
 av_log(avctx, AV_LOG_ERROR,
"hardware accelerator failed to decode picture\n");
+} else if (!in_setup && cur->needs_fg) {
+AVFrameSideData *sd = av_frame_get_side_data(cur->f, 
AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+av_assert0(sd); // always present if `cur->needs_fg`
+err = ff_h274_apply_film_grain(cur->f_grain, cur->f, >h274db,
+   (AVFilmGrainParams *) sd->data);
+if (err < 0) {
+av_log(h->avctx, AV_LOG_WARNING, "Failed synthesizing film "
+   "grain, ignoring: %s\n", av_err2str(err));
+cur->needs_fg = 0;
+err = 0;
+}
 }
 
 if (!in_setup && !h->droppable)
-ff_thread_report_progress(>cur_pic_ptr->tf, INT_MAX,
+ff_thread_report_progress(>tf, INT_MAX,
   h->picture_structure == PICT_BOTTOM_FIELD);
 emms_c();
 
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 9244d2d5dd..98ca8836db 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -197,6 +197,16 @@ static int alloc_picture(H264Context *h, H264Picture *pic)
 

[FFmpeg-devel] [PATCH v2 2/3] avcodec/h274: add film grain synthesis routine

2021-08-17 Thread Niklas Haas
From: Niklas Haas 

This could arguably also be a vf, but I decided to put it here since
decoders are technically required to apply film grain during the output
step, and I would rather want to avoid requiring users insert the
correct film grain synthesis filter on their own.

The code, while in C, is written in a way that unrolls/vectorizes fairly
well under -O3, and is reasonably cache friendly. On my CPU, a single
thread pushes about 400 FPS at 1080p.

Apart from hand-written assembly, one possible avenue of improvement
would be to change the access order to compute the grain row-by-row
rather than in 8x8 blocks. This requires some redundant PRNG calls, but
would make the algorithm more cache-oblivious.

The implementation has been written to the wording of SMPTE RDD 5-2006
as faithfully as I can manage. However, apart from passing a visual
inspection, no guarantee of correctness can be made due to the lack of
any publicly available reference implementation against which to
compare it.

Signed-off-by: Niklas Haas 
---
 libavcodec/Makefile |   1 +
 libavcodec/h274.c   | 811 
 libavcodec/h274.h   |  52 +++
 3 files changed, 864 insertions(+)
 create mode 100644 libavcodec/h274.c
 create mode 100644 libavcodec/h274.h

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 9a6adb9903..21739b4064 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -42,6 +42,7 @@ OBJS = ac3_parser.o   
  \
dirac.o  \
dv_profile.o \
encode.o \
+   h274.o   \
imgconvert.o \
jni.o\
mathtables.o \
diff --git a/libavcodec/h274.c b/libavcodec/h274.c
new file mode 100644
index 00..0efc00ca1d
--- /dev/null
+++ b/libavcodec/h274.c
@@ -0,0 +1,811 @@
+/*
+ * H.274 film grain synthesis
+ * Copyright (c) 2021 Niklas Haas 
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * H.274 film grain synthesis.
+ * @author Niklas Haas 
+ */
+
+#include "libavutil/avassert.h"
+#include "libavutil/imgutils.h"
+
+#include "h274.h"
+
+// The code in this file has a lot of loops that vectorize very well, this is
+// about a 40% speedup for no obvious downside.
+#pragma GCC optimize("tree-vectorize")
+
+static const int8_t Gaussian_LUT[2048+256];
+static const uint32_t Seed_LUT[256];
+static const int8_t R64T[64][64];
+
+static void prng_shift(uint32_t *state)
+{
+// Primitive polynomial x^31 + x^3 + 1 (modulo 2)
+uint32_t x = *state;
+uint8_t feedback = (x >> 2) ^ (x >> 30);
+*state = (x << 1) | (feedback & 1u);
+}
+
+static void init_slice_c(int8_t out[64][64], uint8_t h, uint8_t v,
+ int16_t tmp[64][64])
+{
+static const uint8_t deblock_factors[13] = {
+64, 71, 77, 84, 90, 96, 103, 109, 116, 122, 128, 128, 128
+};
+
+const uint8_t deblock_coeff = deblock_factors[v];
+const uint8_t freq_h = ((h + 3) << 2) - 1;
+const uint8_t freq_v = ((v + 3) << 2) - 1;
+uint32_t seed = Seed_LUT[h + v * 13];
+
+// Initialize with random gaussian values, using the output array as a
+// temporary buffer for these intermediate values.
+//
+// Note: To make the subsequent matrix multiplication cache friendlier, we
+// store each *column* of the starting image in a *row* of `out`
+for (int y = 0; y <= freq_v; y++) {
+for (int x = 0; x <= freq_h; x += 4) {
+uint16_t offset = seed % 2048;
+out[x + 0][y] = Gaussian_LUT[offset + 0];
+out[x + 1][y] = Gaussian_LUT[offset + 1];
+out[x + 2][y] = Gaussian_LUT[offset + 2];
+out[x + 3][y] = Gaussian_LUT[offset + 3];
+prng_shift();
+}
+}
+
+out[0][0] = 0;
+
+// 64x64 inverse integer transform
+for (int y = 0; y < 64; y++) {
+for (int x = 0; x < 64; x++) {

[FFmpeg-devel] [PATCH v2 1/3] avcodec/h264_slice: compute and export film grain seed

2021-08-17 Thread Niklas Haas
From: Niklas Haas 

>From SMPTE RDD 5-2006, the grain seed is to be computed from the
following definition of `pic_offset`:

> When decoding H.264 | MPEG-4 AVC bitstreams, pic_offset is defined as
> follows:
>   - pic_offset = PicOrderCnt(CurrPic) + (PicOrderCnt_offset << 5)
> where:
>   - PicOrderCnt(CurrPic) is the picture order count of the current frame,
> which shall be derived from [the video stream].
>
>   - PicOrderCnt_offset is set to idr_pic_id on IDR frames. idr_pic_id
> shall be read from the slice header of [the video stream]. On non-IDR I
> frames, PicOrderCnt_offset is set to 0. A frame shall be classified as I
> frame when all its slices are I slices, which may be optionally
> designated by setting primary_pic_type to 0 in the access delimiter NAL
> unit. Otherwise, PicOrderCnt_offset it not changed. PicOrderCnt_offset is
> updated in decoding order.

Co-authored-by: James Almer 
Signed-off-by: Niklas Haas 
---
 libavcodec/h264_slice.c   |  9 -
 libavcodec/h264dec.c  | 14 ++
 libavcodec/h264dec.h  |  7 +++
 libavutil/film_grain_params.h |  3 +++
 4 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 0d7107d455..9244d2d5dd 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -406,6 +406,7 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 
 h->next_output_pic   = h1->next_output_pic;
 h->next_outputed_poc = h1->next_outputed_poc;
+h->poc_offset= h1->poc_offset;
 
 memcpy(h->mmco, h1->mmco, sizeof(h->mmco));
 h->nb_mmco = h1->nb_mmco;
@@ -1335,6 +1336,7 @@ static int h264_export_frame_props(H264Context *h)
 return AVERROR(ENOMEM);
 
 fgp->type = AV_FILM_GRAIN_PARAMS_H274;
+fgp->seed = cur->poc + (h->poc_offset << 5);
 
 fgp->codec.h274.model_id = fgc->model_id;
 if (fgc->separate_colour_description_present_flag) {
@@ -1543,6 +1545,11 @@ static int h264_field_start(H264Context *h, const 
H264SliceContext *sl,
 h->poc.delta_poc[0] = sl->delta_poc[0];
 h->poc.delta_poc[1] = sl->delta_poc[1];
 
+if (nal->type == H264_NAL_IDR_SLICE)
+h->poc_offset = sl->idr_pic_id;
+else if (h->picture_intra_only)
+h->poc_offset = 0;
+
 /* Shorten frame num gaps so we don't have to allocate reference
  * frames just to throw them away */
 if (h->poc.frame_num != h->poc.prev_frame_num) {
@@ -1891,7 +1898,7 @@ static int h264_slice_header_parse(const H264Context *h, 
H264SliceContext *sl,
 }
 
 if (nal->type == H264_NAL_IDR_SLICE)
-get_ue_golomb_long(>gb); /* idr_pic_id */
+sl->idr_pic_id = get_ue_golomb_long(>gb);
 
 sl->poc_lsb = 0;
 sl->delta_poc_bottom = 0;
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 38f8967265..dc99ee995e 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -485,6 +485,8 @@ static void h264_decode_flush(AVCodecContext *avctx)
 static int get_last_needed_nal(H264Context *h)
 {
 int nals_needed = 0;
+int slice_type = 0;
+int picture_intra_only = 1;
 int first_slice = 0;
 int i, ret;
 
@@ -516,11 +518,23 @@ static int get_last_needed_nal(H264Context *h)
 !first_slice ||
 first_slice != nal->type)
 nals_needed = i;
+slice_type = get_ue_golomb_31();
+if (slice_type > 9) {
+if (h->avctx->err_recognition & AV_EF_EXPLODE)
+return AVERROR_INVALIDDATA;
+}
+if (slice_type > 4)
+slice_type -= 5;
+
+slice_type = ff_h264_golomb_to_pict_type[slice_type];
+picture_intra_only &= (slice_type & 3) == AV_PICTURE_TYPE_I;
 if (!first_slice)
 first_slice = nal->type;
 }
 }
 
+h->picture_intra_only = picture_intra_only;
+
 return nals_needed;
 }
 
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 125966aa04..7c419de051 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -331,6 +331,7 @@ typedef struct H264SliceContext {
 int explicit_ref_marking;
 
 int frame_num;
+int idr_pic_id;
 int poc_lsb;
 int delta_poc_bottom;
 int delta_poc[2];
@@ -384,6 +385,11 @@ typedef struct H264Context {
  */
 int picture_idr;
 
+/*
+ * Set to 1 when the current picture contains only I slices, 0 otherwise.
+ */
+int picture_intra_only;
+
 int crop_left;
 int crop_right;
 int crop_top;
@@ -473,6 +479,7 @@ typedef struct H264Context {
 int last_pocs[MAX_DELAYED_PIC_COUNT];
 H264Picture *next_output_pic;
 int next_outputed_poc;
+int poc_offset; ///< PicOrderCnt_offset from SMPTE RDD-2006
 
 /**
  * memory management control operations buffer.
diff --git a/libavutil/film_grain_params.h b/libavutil/film_grain_params.h
index 

Re: [FFmpeg-devel] [PATCH 4/4] avcodec/h264dec: apply H.274 film grain

2021-08-17 Thread Niklas Haas
On Sun, 15 Aug 2021 19:11:42 +0200 Michael Niedermayer  
wrote:
> On Sat, Aug 14, 2021 at 01:36:20PM +0200, Niklas Haas wrote:
> > From: Niklas Haas 
> > 
> > Because we need access to ref frames without film grain applied, we have
> > to add an extra AVFrame to H264Picture to avoid messing with the
> > original. This requires some amount of overhead to make the reference
> > moves work out, but it allows us to benefit from frame multithreading
> > for film grain application "for free".
> > 
> > Unfortunately, this approach requires twice as much RAM to be constantly
> > allocated, due to the need for an extra buffer per H264Picture. In
> > theory, we could get away with freeing up this memory as soon as it's no
> > longer needed, but trying to call ff_thread_release_buffer() from
> > output_frame() simply deadlocks the decoder and I haven't figured out
> > why. Patches welcome(tm)
> > 
> > Tested on all three cases of (no fg), (fg present but exported) and (fg
> > present and not exported), with and without threading.
> > ---
> >  libavcodec/h264_picture.c | 24 +++-
> >  libavcodec/h264_slice.c   | 18 +--
> >  libavcodec/h264dec.c  | 48 +++
> >  libavcodec/h264dec.h  |  6 +
> >  4 files changed, 83 insertions(+), 13 deletions(-)
> 
> [...]
> > @@ -826,6 +836,21 @@ static int output_frame(H264Context *h, AVFrame *dst, 
> > H264Picture *srcp)
> >  AVFrame *src = srcp->f;
> >  int ret;
> >  
> > +if (srcp->needs_fg) {
> 
> > +AVFrameSideData *sd = av_frame_get_side_data(src, 
> > AV_FRAME_DATA_FILM_GRAIN_PARAMS);
> > +av_assert0(sd);
> 
> Assertion is not correct to check for failure
> 
> This would kill the process with the lib and app

This is not a failure check. The point is that it should be impossible
for `srcp->needs_fg` to be true but the side data to be absent. The
assert is just there to communicate/enforce this.

I've added an extra comment in v2.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread James Almer

On 8/17/2021 12:25 PM, Ronald S. Bultje wrote:

Hi,

On Tue, Aug 17, 2021 at 2:33 AM Hendrik Leppkes  wrote:


On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua  wrote:

Based on IceLake-AVX512 and newer architecture, a broad
range of the subsets of AVX512 could be supported.



[..]


-enabled avx512 && check_x86asm avx512_external "vmovdqa32

[eax]{k1}{z}, zmm0"

+# Only IceLake and newer architectures could enable AVX512
+#

F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ

+enabled avx512 && check_x86asm avx512_external "vpdpwssds

zmm31{k1}{z}, zmm29, zmm28"

  enabled avx2   && check_x86asm avx2_external   "vextracti128

xmm0, ymm0, 0"

  enabled xop&& check_x86asm xop_external"vpmacsdd xmm0,

xmm1, xmm2, xmm3"

  enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,

ymm1, ymm2, ymm3"

Note that you are just checking the functionality of the assembler
here, not having a runtime impact.
What you would likely want is to update avutil/x86/cpu.c as well to
only enable the AVX512 flag on those CPUs.



[After IRC discussion] you want runtime checks for the
variants/combinations-of-subsets that we want to support. Right now, avx512
means skylake, so you may want to rename that flag to "avx512skl", and add
a new runtime flag + check for the icelake subset called "avx512icl". Then
in your implementations, you use the appropriate flag, and code components
can individually choose to use skylake- and/or icelake-optimized ax512
functions.


Does it really mean Skylake-X? Afaik the flag checks in cpu.c currently 
look for AVX-512 Foundation and ZMM support, so it means Knights Landing 
or newer.


What about just making the existing AVX512 flag mean F+VL+DQ+BW, so 
Skylake-X (Anything older just lacks useful instructions for 
multimedia), and if needed for this new code add a new avx512icl flag 
that also looks for something like GFNI.




Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec: add SMC encoder

2021-08-17 Thread Andreas Rheinhardt
Paul B Mahol:
> Signed-off-by: Paul B Mahol 
> ---
> +bytestream2_init_writer(>pb, pkt->data, pkt->size);
> +
> +bytestream2_put_be32(>pb, 0x00);
> +
> +pal = av_packet_new_side_data(pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);

Missing check for allocation failure.

> +memcpy(pal, frame->data[1], AVPALETTE_SIZE);
> +
> +smc_encode_stream(s, pict);
> +
> +av_shrink_packet(pkt, bytestream2_tell_p(>pb));
> +
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] avcodec: add SMC encoder

2021-08-17 Thread Paul B Mahol
will apply soon.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [WIP] Event loop

2021-08-17 Thread ffmpegandmahanstreamer
August 17, 2021 3:58 AM, "Nicolas George"  wrote:

> Xiang Xiao (12021-08-17):
> 
>> Nicolas, do you have any more progress? I am very interested in your
>> proposal and want to test your change in our special device.
> 
> Sorry. I have been focusing on other projects while looking for a good
> way to avoid dynamic allocations in the thread-aware scheduler.
> 
> Since I could not find one, I will try to start again on this soon.
> 
> Do you have an opinion on the low-level single-thread API?

Not him, obvisouly, but i think the API idea is great
> 
> Can you share some details about the needs your special device? I would
> consider them when writing the API.
> 
> Regards,
> 
> --
> Nicolas George
> 
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] get_cabac_inline_x86: Don't inline if 32-bit clang on windows

2021-08-17 Thread Martin Storsjö

On Tue, 17 Aug 2021, James Almer wrote:


On 8/17/2021 12:35 PM, Christopher Degawa wrote:

Fixes https://trac.ffmpeg.org/ticket/8903

relevant https://github.com/msys2/MINGW-packages/discussions/9258

Signed-off-by: Christopher Degawa 
---
  libavcodec/x86/cabac.h | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 53d74c541e..b046a56a6b 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -177,8 +177,13 @@
#if HAVE_7REGS && !BROKEN_COMPILER
  #define get_cabac_inline get_cabac_inline_x86
-static av_always_inline int get_cabac_inline_x86(CABACContext *c,
- uint8_t *const state)
+static
+#if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)


Can you do some benchmarks to see how not inlining this compares to 
simply disabling this code for this target? Because it sounds like you 
may want to add this case to the BROKEN_COMPILER macro, and not use this 
code at all.


FWIW, my patch for this issue last year was exactly to add this config 
combo to the broken compiler case - but I believe this is better. 
Benchmarks is of course always best. It'd be interesting also to measure 
the impact of not inlineing this in a configuration where it actually 
works as intended.


// Martin

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] get_cabac_inline_x86: Don't inline if 32-bit clang on windows

2021-08-17 Thread Martin Storsjö

On Tue, 17 Aug 2021, Christopher Degawa wrote:


Fixes https://trac.ffmpeg.org/ticket/8903

relevant https://github.com/msys2/MINGW-packages/discussions/9258

Signed-off-by: Christopher Degawa 
---
libavcodec/x86/cabac.h | 9 +++--
1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 53d74c541e..b046a56a6b 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -177,8 +177,13 @@

#if HAVE_7REGS && !BROKEN_COMPILER
#define get_cabac_inline get_cabac_inline_x86
-static av_always_inline int get_cabac_inline_x86(CABACContext *c,
- uint8_t *const state)
+static
+#if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)
+av_noinline
+#else
+av_always_inline
+#endif
+int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
{
int bit, tmp;
#ifdef BROKEN_RELOCATIONS
--
2.32.0


This looks good to me, and is a less intrusive fix for the issue than the 
one I submitted last year.


FWIW, the issue is avoided in some configurations by configuring with 
--cpu=i686, which disallows use of inline MMX/SSE like this, but with this 
fix one can keep all the asm enabled.


// Martin

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] get_cabac_inline_x86: Don't inline if 32-bit clang on windows

2021-08-17 Thread James Almer

On 8/17/2021 12:35 PM, Christopher Degawa wrote:

Fixes https://trac.ffmpeg.org/ticket/8903

relevant https://github.com/msys2/MINGW-packages/discussions/9258

Signed-off-by: Christopher Degawa 
---
  libavcodec/x86/cabac.h | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 53d74c541e..b046a56a6b 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -177,8 +177,13 @@
  
  #if HAVE_7REGS && !BROKEN_COMPILER

  #define get_cabac_inline get_cabac_inline_x86
-static av_always_inline int get_cabac_inline_x86(CABACContext *c,
- uint8_t *const state)
+static
+#if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)


Can you do some benchmarks to see how not inlining this compares to 
simply disabling this code for this target? Because it sounds like you 
may want to add this case to the BROKEN_COMPILER macro, and not use this 
code at all.



+av_noinline
+#else
+av_always_inline
+#endif
+int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
  {
  int bit, tmp;
  #ifdef BROKEN_RELOCATIONS



___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] get_cabac_inline_x86: Don't inline if 32-bit clang on windows

2021-08-17 Thread Christopher Degawa
Fixes https://trac.ffmpeg.org/ticket/8903

relevant https://github.com/msys2/MINGW-packages/discussions/9258

Signed-off-by: Christopher Degawa 
---
 libavcodec/x86/cabac.h | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index 53d74c541e..b046a56a6b 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -177,8 +177,13 @@
 
 #if HAVE_7REGS && !BROKEN_COMPILER
 #define get_cabac_inline get_cabac_inline_x86
-static av_always_inline int get_cabac_inline_x86(CABACContext *c,
- uint8_t *const state)
+static
+#if defined(_WIN32) && !defined(_WIN64) && defined(__clang__)
+av_noinline
+#else
+av_always_inline
+#endif
+int get_cabac_inline_x86(CABACContext *c, uint8_t *const state)
 {
 int bit, tmp;
 #ifdef BROKEN_RELOCATIONS
-- 
2.32.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 4/6] avfilter/avfilter: Allow to free non-static pads generically

2021-08-17 Thread Andreas Rheinhardt
Nicolas George:
> Andreas Rheinhardt (12021-08-17):
>> This can be enabled/disabled on a per-filter basis by setting
>> the new internal flags FF_FILTER_FLAG_FREE_(IN|OUT)PADS and
>> on a per-pad basis by setting the AVFILTERPAD_FLAG_FREE_NAME flag.
>>
>> Signed-off-by: Andreas Rheinhardt 
>> ---
>> I decided to combine both approaches: It has the advantage that
>> the marginal extra code for a filter all of whose inputs'/outputs'
>> names need to be freed is zero while making it easy to handle filters
>> that have some inputs/outputs whose names need to be freed.
> 
> It had the drawback that the information about the nature of the pads
> ends up at two places of the code that are not close to each other. It
> is not good for maintenance and readability.
> 
> I think a boolean flag to ff_append_...pad() would do the job much more
> elegantly. What do you think about it?
> 
Well, this has the problem that it adds code in many places, whereas
static flags for the filter just need code in two places (with zero
marginal cost for making another filter use this feature, whereas a flag
to ff_append_... has a nonzero cost even to users not making use of this
feature). I also like that it uses a static flag for what is essentially
a static property.

- Andreas
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread Ronald S. Bultje
Hi,

On Tue, Aug 17, 2021 at 2:33 AM Hendrik Leppkes  wrote:

> On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua  wrote:
> > Based on IceLake-AVX512 and newer architecture, a broad
> > range of the subsets of AVX512 could be supported.
>
[..]

> > -enabled avx512 && check_x86asm avx512_external "vmovdqa32
> [eax]{k1}{z}, zmm0"
> > +# Only IceLake and newer architectures could enable AVX512
> > +#
> F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
> > +enabled avx512 && check_x86asm avx512_external "vpdpwssds
> zmm31{k1}{z}, zmm29, zmm28"
> >  enabled avx2   && check_x86asm avx2_external   "vextracti128
> xmm0, ymm0, 0"
> >  enabled xop&& check_x86asm xop_external"vpmacsdd xmm0,
> xmm1, xmm2, xmm3"
> >  enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0,
> ymm1, ymm2, ymm3"
>
> Note that you are just checking the functionality of the assembler
> here, not having a runtime impact.
> What you would likely want is to update avutil/x86/cpu.c as well to
> only enable the AVX512 flag on those CPUs.
>

[After IRC discussion] you want runtime checks for the
variants/combinations-of-subsets that we want to support. Right now, avx512
means skylake, so you may want to rename that flag to "avx512skl", and add
a new runtime flag + check for the icelake subset called "avx512icl". Then
in your implementations, you use the appropriate flag, and code components
can individually choose to use skylake- and/or icelake-optimized ax512
functions.

Ronald
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [WIP] Event loop

2021-08-17 Thread Xiang Xiao
On Tue, Aug 17, 2021 at 3:58 PM Nicolas George  wrote:

> Xiang Xiao (12021-08-17):
> > Nicolas, do you have any more progress? I am very interested in your
> > proposal and want to test your change in our special device.
>
>
We are building an audio framework on top of FFmpeg for a wearable device:

   1. Reuse avfilter to form the process pipeline
   2. Add the new avfilter to process our special audio/a2dp/sco device
   3. Add audio routing policy through
   https://github.com/intel/parameter-framework
   4. Expose a bunch of audio play, capture and routing API to client by
   RPC

Since the wearable device has very limited resources, we want to run all
processes in one loop thread. It's easy to achieve by poll/select:

   1. Mark output/input device file handle to non-block
   2. Mark RPC socket handle to non-block
   3. Poll the aboved handles in the main loop
   4. Call the filter/graph function to trigger the audio/route action


Sorry. I have been focusing on other projects while looking for a good
> way to avoid dynamic allocations in the thread-aware scheduler.
>
> Since I could not find one, I will try to start again on this soon.
>
> Do you have an opinion on the low-level single-thread API?
>
>
Yes, you can see our approach is one thread solution. But, there is a major
block issue: protocol/avformat can't handle the non-block I/O correctly.
Two approach I can think:

   1. Handle the partial read(or -EGAIN) correctly by saving
   the intermediate result into protocol/avformat context
   2. Protocol/avformat utilize your event loop to wait the new data
   instead of blocking on recv call, the same loop can replace the one we used
   in the main thread.

 The first approach requires reviewing and adapting each protocol/avformat
carefully which is a huge amount of work. The second approach is more
simple.

Can you share some details about the needs your special device? I would
> consider them when writing the API.
>
> Regards,
>
> --
>   Nicolas George
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] ffmpeg_hw: Don't ignore key parameters when initializing a hw device

2021-08-17 Thread James Almer

On 8/11/2021 4:42 PM, Soft Works wrote:




-Original Message-
From: ffmpeg-devel  On Behalf Of
Haihao Xiang
Sent: Wednesday, 11 August 2021 08:44
To: ffmpeg-devel@ffmpeg.org
Cc: Haihao Xiang 
Subject: [FFmpeg-devel] [PATCH] ffmpeg_hw: Don't ignore key
parameters when initializing a hw device

Currently user may use '-init_hw_device type=name' to initialize a hw
device, however the key parameter is ignored when use '-
init_hw_device
type=name,key=value'. After applying this patch, user may set key
parameter if needed.
---
  fftools/ffmpeg_hw.c | 16 +++-
  1 file changed, 15 insertions(+), 1 deletion(-)



This makes sense as it allows to further simplify hw initialization
command lines.

As an example, you can write

-init_hw_device qsv=qd,child_device=1

Instead of

-init_hw_device qsv=qd:hw_any,child_device=1

So besides the former being shorter, it also saves the user from
needing to remember 'hw_any' (or hw, hw2, hw3, hw4 matching the
child_device param on Windows).

LGTM.

softworkz


Pushed, thanks.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] lavc/aarch64: add pred functions for 10-bit

2021-08-17 Thread Martin Storsjö

On Mon, 16 Aug 2021, Mikhail Nitenko wrote:


Benchmarks:A53 A72
pred8x8_dc_10_c:   64.255.7
pred8x8_dc_10_neon:61.753.7
pred8x8_dc_128_10_c:   26.020.7
pred8x8_dc_128_10_neon:30.724.5
pred8x8_horizontal_10_c:   60.035.2
pred8x8_horizontal_10_neon:38.033.0
pred8x8_left_dc_10_c:  42.535.5
pred8x8_left_dc_10_neon:   50.741.5
pred8x8_mad_cow_dc_0l0_10_c:   55.744.7
pred8x8_mad_cow_dc_0l0_10_neon:47.537.2
pred8x8_mad_cow_dc_0lt_10_c:   89.275.5
pred8x8_mad_cow_dc_0lt_10_neon:52.247.0
pred8x8_mad_cow_dc_l0t_10_c:   74.759.2
pred8x8_mad_cow_dc_l0t_10_neon:50.544.7
pred8x8_mad_cow_dc_l00_10_c:   58.045.7
pred8x8_mad_cow_dc_l00_10_neon:42.537.5
pred8x8_plane_10_c:   347.7   295.5
pred8x8_plane_10_neon:136.2   108.0
pred8x8_top_dc_10_c:   44.538.5
pred8x8_top_dc_10_neon:39.734.5
pred8x8_vertical_10_c: 27.521.7
pred8x8_vertical_10_neon:  21.022.2
pred16x16_plane_10_c:1242.0  1075.7
pred16x16_plane_10_neon:  324.0   199.5

Signed-off-by: Mikhail Nitenko 
---

moved to 32-bit, however, in plane the 16bit are not enough, and it
overflows, so when it overflows the code starts using 32bit wide
sections

libavcodec/aarch64/h264pred_init.c |  40 +++-
libavcodec/aarch64/h264pred_neon.S | 302 -
2 files changed, 335 insertions(+), 7 deletions(-)

diff --git a/libavcodec/aarch64/h264pred_init.c 
b/libavcodec/aarch64/h264pred_init.c
index 325a86bfcd..0ae8f70d23 100644
--- a/libavcodec/aarch64/h264pred_init.c
+++ b/libavcodec/aarch64/h264pred_init.c
@@ -45,10 +45,23 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_l00_dc_neon(uint8_t *src, ptrdiff_t stride);
void ff_pred8x8_0l0_dc_neon(uint8_t *src, ptrdiff_t stride);

-void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
-void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride);
-void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride);
void ff_pred16x16_vert_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_hor_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_plane_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred16x16_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+
+void ff_pred8x8_vert_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_hor_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_plane_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_128_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_left_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_top_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l0t_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0lt_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_l00_dc_neon_10(uint8_t *src, ptrdiff_t stride);
+void ff_pred8x8_0l0_dc_neon_10(uint8_t *src, ptrdiff_t stride);

static av_cold void h264_pred_init_neon(H264PredContext *h, int codec_id,
const int bit_depth,
@@ -84,10 +97,31 @@ static av_cold void h264_pred_init_neon(H264PredContext *h, 
int codec_id,
h->pred16x16[PLANE_PRED8x8  ] = ff_pred16x16_plane_neon;
}
if (bit_depth == 10) {
+if (chroma_format_idc <= 1) {
+h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vert_neon_10;
+h->pred8x8[HOR_PRED8x8  ] = ff_pred8x8_hor_neon_10;
+if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8)
+h->pred8x8[PLANE_PRED8x8] = ff_pred8x8_plane_neon_10;
+h->pred8x8[DC_128_PRED8x8   ] = ff_pred8x8_128_dc_neon_10;
+if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 &&
+codec_id != AV_CODEC_ID_VP8) {
+h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_neon_10;
+h->pred8x8[LEFT_DC_PRED8x8] = ff_pred8x8_left_dc_neon_10;
+h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_neon_10;
+h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8] = 
ff_pred8x8_l0t_dc_neon_10;
+h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8] = 
ff_pred8x8_0lt_dc_neon_10;
+h->pred8x8[ALZHEIMER_DC_L00_PRED8x8] = 
ff_pred8x8_l00_dc_neon_10;
+h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8] = 
ff_pred8x8_0l0_dc_neon_10;
+}
+}
+
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_neon_10;
h->pred16x16[VERT_PRED8x8   ] = ff_pred16x16_vert_neon_10;
h->pred16x16[HOR_PRED8x8] = ff_pred16x16_hor_neon_10;
h->pred16x16[TOP_DC_PRED8x8 ] = ff_pred16x16_top_dc_neon_10;
+if (codec_id != AV_CODEC_ID_SVQ3 && codec_id != 

Re: [FFmpeg-devel] [PATCH v2 2/2] lavc/aarch64: h264, add chroma loop filters for 10bit

2021-08-17 Thread Martin Storsjö

On Mon, 16 Aug 2021, Mikhail Nitenko wrote:


Benchmarks: A53 A72
h264_h_loop_filter_chroma422_10bpp_c:  277.5   114.2
h264_h_loop_filter_chroma422_10bpp_neon:   109.781.7
h264_h_loop_filter_chroma_10bpp_c: 165.075.5
h264_h_loop_filter_chroma_10bpp_neon:  121.274.7
h264_h_loop_filter_chroma_intra422_10bpp_c:324.2   124.2
h264_h_loop_filter_chroma_intra422_10bpp_neon: 155.299.5
h264_h_loop_filter_chroma_intra_10bpp_c:   121.048.5
h264_h_loop_filter_chroma_intra_10bpp_neon: 79.552.7
h264_h_loop_filter_chroma_mbaff422_10bpp_c:191.073.5
h264_h_loop_filter_chroma_mbaff422_10bpp_neon: 121.275.5
h264_h_loop_filter_chroma_mbaff_intra422_10bpp_c:  117.051.5
h264_h_loop_filter_chroma_mbaff_intra422_10bpp_neon:79.553.7
h264_h_loop_filter_chroma_mbaff_intra_10bpp_c:  63.028.5
h264_h_loop_filter_chroma_mbaff_intra_10bpp_neon:   48.733.2
h264_v_loop_filter_chroma_10bpp_c: 260.2   135.5
h264_v_loop_filter_chroma_10bpp_neon:   72.249.2
h264_v_loop_filter_chroma_intra_10bpp_c:   158.070.7
h264_v_loop_filter_chroma_intra_10bpp_neon: 48.732.0

Signed-off-by: Mikhail Nitenko 
---

removed leftover code, moved from 32bit and started loading with two
alternating registers, code became quite a bit faster!

libavcodec/aarch64/h264dsp_init_aarch64.c |  37 
libavcodec/aarch64/h264dsp_neon.S | 255 ++
2 files changed, 292 insertions(+)




diff --git a/libavcodec/aarch64/h264dsp_neon.S 
b/libavcodec/aarch64/h264dsp_neon.S
index 997082498f..80b7ed5ce1 100644
--- a/libavcodec/aarch64/h264dsp_neon.S
+++ b/libavcodec/aarch64/h264dsp_neon.S
@@ -819,3 +819,258 @@ endfunc
weight_func 16
weight_func 8
weight_func 4
+
+.macro  h264_loop_filter_start_10
+cmp w2,  #0
+ldr w6,  [x4]
+ccmpw3,  #0, #0, ne
+lsl w2,  w2, #2
+mov v24.S[0], w6
+lsl w3,  w3, #2
+and w8,  w6,  w6,  lsl #16


Nitpick: Align the third operand column on ccmp/lsl above with how it's 
done here for the 'and'. (Yes the existing code here seems to have the 
same misalignment.)



+b.eq1f
+andsw8,  w8,  w8,  lsl #8
+b.ge2f
+1:
+ret
+2:
+.endm
+
+.macro h264_loop_filter_start_intra_10
+orr w4,  w2,  w3
+cbnzw4,  1f
+ret
+1:
+lsl w2, w2, #2
+lsl w3, w3, #2
+dup v30.8h, w2  // alpha
+dup v31.8h, w3  // beta
+.endm
+
+.macro  h264_loop_filter_chroma_10
+dup v22.8h, w2  // alpha
+dup v23.8h, w3  // beta
+uxtlv24.8h, v24.8b  // tc0
+
+uabdv26.8h, v16.8h, v0.8h   // abs(p0 - q0)
+uabdv28.8h, v18.8h, v16.8h  // abs(p1 - p0)
+uabdv30.8h, v2.8h,  v0.8h   // abs(q1 - q0)
+cmhiv26.8h, v22.8h, v26.8h  // < alpha
+cmhiv28.8h, v23.8h, v28.8h  // < beta
+cmhiv30.8h, v23.8h, v30.8h  // < beta
+
+and v26.16b, v26.16b, v28.16b
+mov v4.16b, v0.16b
+sub v4.8h,  v4.8h,  v16.8h
+and v26.16b, v26.16b, v30.16b
+shl v4.8h,  v4.8h,  #2
+mov x8, v26.d[0]
+mov x9, v26.d[1]
+sli v24.8H, v24.8H, #8
+uxtlv24.8H, v24.8B
+add v4.8h,  v4.8h,  v18.8h
+shl v24.8h, v24.8h,  #2
+
+addsx8,  x8,  x9


I think it would be better for in-order cores to do this 'adds' maybe a 
couple instructions earlier (but the 'mov' from SIMD to GPR probably takes 
a couple cycles, so not too far earlier), maybe one instruction earlier?



+b.eq9f
+
+moviv31.8h, #3  // (tc0 - 1) << (BIT_DEPTH - 
8)) + 1


I guess this 'movi' could be done before the 'b.eq' too? If we branch out, 
we'd have run it in vain, but it's probably essentially free in that case 
anyway, and avoids having the next 'uqsub' stalling, waiting for it.



+uqsub   v24.8h, v24.8h,  v31.8h
+sub v4.8h , v4.8h,  v2.8h
+srshr   v4.8h,  v4.8h,  #3
+sminv4.8h,  v4.8h,  v24.8h
+neg v25.8h, v24.8h
+smaxv4.8h,  v4.8h,  v25.8h
+and v4.16B, v4.16B, v26.16B
+add v16.8h,  v16.8h,  v4.8h
+sub

Re: [FFmpeg-devel] [PATCH 2/2] avcodec/h264_parser: fix nalsize parser

2021-08-17 Thread Michael Niedermayer
On Mon, Aug 16, 2021 at 11:31:15PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > Fixes: left shift of 16711968 by 8 places cannot be represented in type 
> > 'int'
> > Fixes: 
> > 36601/clusterfuzz-testcase-minimized-ffmpeg_AV_CODEC_ID_H264_fuzzer-6581933285965824
> > 
> > Found-by: continuous fuzzing process 
> > https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> > Signed-off-by: Michael Niedermayer 
> > ---
> >  libavcodec/h264_parser.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
> > index d3c56cc188..22111c62a2 100644
> > --- a/libavcodec/h264_parser.c
> > +++ b/libavcodec/h264_parser.c
> > @@ -86,7 +86,7 @@ static int h264_find_frame_end(H264ParseContext *p, const 
> > uint8_t *buf,
> >  int nalsize = 0;
> >  i = next_avc;
> >  for (j = 0; j < p->nal_length_size; j++)
> > -nalsize = (nalsize << 8) | buf[i++];
> > +nalsize = ((unsigned)nalsize << 8) | buf[i++];
> >  if (nalsize <= 0 || nalsize > buf_size - i) {
> >  av_log(logctx, AV_LOG_ERROR, "AVC-parser: nal size %d 
> > remaining %d\n", nalsize, buf_size - i);
> >  return buf_size;
> > 
> Makes me wonder why I never applied this:
> https://patchwork.ffmpeg.org/project/ffmpeg/patch/20200529161755.9904-1-andreas.rheinha...@gmail.com/
> (Your fix would only fix the undefined behaviour, not the nonsense
> logmessage (with negative sizes) one gets in this scenarion.)

if there was no reason why it wasnt applied then please apply and backport your 
fix

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

He who knows, does not speak. He who speaks, does not know. -- Lao Tsu


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v3] avcodec/frame_thread_encoder: Free AVCodecContext structure on error during init

2021-08-17 Thread Michael Niedermayer
On Mon, Aug 16, 2021 at 11:28:41PM +0200, Andreas Rheinhardt wrote:
> Michael Niedermayer:
> > On Sun, Aug 15, 2021 at 07:35:35PM +0200, Andreas Rheinhardt wrote:
> >>
> >> PS: I still don't know whether my patch for av_opt_copy needs to bump
> >> minor or micro.
> > 
> > you mean the documentation changing patch ?
> > IMHO It depends on the viewpoint i guess. is there a bug in the 
> > documentation
> > a bug in the implementation or a AP/ABI change
> > 
> There was no bug per se; it is also no ABI change (it is only a
> documentation change after all). It is just that some previously
> undocumented behaviour got documented.

if you see it that way then a micro bump is enough

thx

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who are too smart to engage in politics are punished by being
governed by those who are dumber. -- Plato 


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH v2] doc/git-howto: be more strict about commit message formatting.

2021-08-17 Thread Nicolas George
Gyan Doshi (12021-08-16):
> The maximum, if specified, should be a single value.

Strictly speaking, yes. But I want to express that it is ok to choose a
lower maximum on a per-commit basis. I personally wrap to 64-66.

> Don't hold up on my account. I'll do it later when I survey that whole page
> for corrections and improvements.

Ok. Pushed.

Thanks.

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [WIP] Event loop

2021-08-17 Thread Nicolas George
Xiang Xiao (12021-08-17):
> Nicolas, do you have any more progress? I am very interested in your
> proposal and want to test your change in our special device.

Sorry. I have been focusing on other projects while looking for a good
way to avoid dynamic allocations in the thread-aware scheduler.

Since I could not find one, I will try to start again on this soon.

Do you have an opinion on the low-level single-thread API?

Can you share some details about the needs your special device? I would
consider them when writing the API.

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 4/6] avfilter/avfilter: Allow to free non-static pads generically

2021-08-17 Thread Nicolas George
Andreas Rheinhardt (12021-08-17):
> This can be enabled/disabled on a per-filter basis by setting
> the new internal flags FF_FILTER_FLAG_FREE_(IN|OUT)PADS and
> on a per-pad basis by setting the AVFILTERPAD_FLAG_FREE_NAME flag.
> 
> Signed-off-by: Andreas Rheinhardt 
> ---
> I decided to combine both approaches: It has the advantage that
> the marginal extra code for a filter all of whose inputs'/outputs'
> names need to be freed is zero while making it easy to handle filters
> that have some inputs/outputs whose names need to be freed.

It had the drawback that the information about the nature of the pads
ends up at two places of the code that are not close to each other. It
is not good for maintenance and readability.

I think a boolean flag to ff_append_...pad() would do the job much more
elegantly. What do you think about it?

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 2/6] avfilter/avfilter: Remove unused feature to add pads in the middle

2021-08-17 Thread Nicolas George
Andreas Rheinhardt (12021-08-17):
> Signed-off-by: Andreas Rheinhardt 
> ---
>  libavfilter/af_acrossover.c |  2 +-
>  libavfilter/af_afir.c   |  8 
>  libavfilter/af_aiir.c   |  4 ++--
>  libavfilter/af_amerge.c |  2 +-
>  libavfilter/af_amix.c   |  2 +-
>  libavfilter/af_anequalizer.c|  4 ++--
>  libavfilter/af_channelsplit.c   |  3 +--
>  libavfilter/af_headphone.c  |  4 ++--
>  libavfilter/af_join.c   |  2 +-
>  libavfilter/af_ladspa.c |  2 +-
>  libavfilter/af_lv2.c|  2 +-
>  libavfilter/avf_aphasemeter.c   |  4 ++--
>  libavfilter/avf_concat.c|  4 ++--
>  libavfilter/avfilter.c  | 15 ---
>  libavfilter/f_ebur128.c |  4 ++--
>  libavfilter/f_interleave.c  |  2 +-
>  libavfilter/f_segment.c |  2 +-
>  libavfilter/f_select.c  |  2 +-
>  libavfilter/f_streamselect.c|  4 ++--
>  libavfilter/internal.h  | 15 +--
>  libavfilter/split.c |  2 +-
>  libavfilter/src_movie.c |  2 +-
>  libavfilter/vf_bm3d.c   |  4 ++--
>  libavfilter/vf_decimate.c   |  4 ++--
>  libavfilter/vf_extractplanes.c  |  2 +-
>  libavfilter/vf_fieldmatch.c |  4 ++--
>  libavfilter/vf_guided.c |  4 ++--
>  libavfilter/vf_mergeplanes.c|  2 +-
>  libavfilter/vf_mix.c|  2 +-
>  libavfilter/vf_premultiply.c|  4 ++--
>  libavfilter/vf_program_opencl.c |  2 +-
>  libavfilter/vf_signature.c  |  2 +-
>  libavfilter/vf_stack.c  |  2 +-
>  libavfilter/vf_xmedian.c|  2 +-
>  34 files changed, 56 insertions(+), 69 deletions(-)

LGTM, but I think:

sed -i 's/ff_insert_pad/ff_append_pad/' *.[ch]
sed -i 's/ff_insert_inpad/ff_append_inpad/' *.[ch]
sed -i 's/ff_insert_outpad/ff_append_outpad/' *.[ch]

would be in order.

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/6] avfilter/internal: Replace AVFilterPad.needs_writable by flags

2021-08-17 Thread Nicolas George
Andreas Rheinhardt (12021-08-17):
> It will be useful in the future when more flags are added.
> 
> Signed-off-by: Andreas Rheinhardt 
> ---
>  libavfilter/af_anequalizer.c  |  2 +-
>  libavfilter/af_channelmap.c   |  2 +-
>  libavfilter/af_firequalizer.c |  2 +-
>  libavfilter/avfilter.c|  2 +-
>  libavfilter/f_reverse.c   |  2 +-
>  libavfilter/internal.h| 21 +
>  libavfilter/vf_chromakey.c|  4 ++--
>  libavfilter/vf_codecview.c|  2 +-
>  libavfilter/vf_colorcontrast.c|  2 +-
>  libavfilter/vf_colorcorrect.c |  2 +-
>  libavfilter/vf_colorize.c |  2 +-
>  libavfilter/vf_colorkey.c |  2 +-
>  libavfilter/vf_colortemperature.c |  2 +-
>  libavfilter/vf_datascope.c|  2 +-
>  libavfilter/vf_despill.c  |  2 +-
>  libavfilter/vf_drawbox.c  |  4 ++--
>  libavfilter/vf_drawtext.c |  2 +-
>  libavfilter/vf_elbg.c |  2 +-
>  libavfilter/vf_exposure.c |  2 +-
>  libavfilter/vf_fade.c |  2 +-
>  libavfilter/vf_fillborders.c  |  2 +-
>  libavfilter/vf_lumakey.c  |  2 +-
>  libavfilter/vf_maskfun.c  |  2 +-
>  libavfilter/vf_monochrome.c   |  2 +-
>  libavfilter/vf_subtitles.c|  2 +-
>  libavfilter/vf_swaprect.c |  2 +-
>  libavfilter/vf_vibrance.c |  2 +-
>  27 files changed, 41 insertions(+), 36 deletions(-)

LGTM. I do not maintain most of these files, but it it straightforward
enough.

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH 1/3] avfilter/avfilter: Remove redundant assignment

2021-08-17 Thread Nicolas George
Andreas Rheinhardt (12021-08-17):
> av_frame_copy_props() already copies pts.
> 
> Signed-off-by: Andreas Rheinhardt 
> ---
>  libavfilter/avfilter.c | 1 -
>  1 file changed, 1 deletion(-)

All three look ok. Thanks.

Regards,

-- 
  Nicolas George


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] libavformat/riffenc: support raw avi for raw PAL8 or Gray8 pixel data

2021-08-17 Thread Ray
is that I should only deal with gray8 format in particular?and it has passed 
the fate test

> 2021年8月17日 上午3:00,Michael Niedermayer  写道:
> 
> On Mon, Aug 16, 2021 at 05:01:14PM +0800, rui.jiang wrote:
>> add palette data in avi header when the input data is raw PAL8 or Gray8 
>> pixel data and the output data is 8bit raw avi video;
>> 
>> Signed-off-by: rui.jiang <229135...@qq.com>
>> ---
>> libavformat/riffenc.c | 10 +-
>> 1 file changed, 9 insertions(+), 1 deletion(-)
>> 
>> diff --git a/libavformat/riffenc.c b/libavformat/riffenc.c
>> index 43c8bf957a..bc654b3cd3 100644
>> --- a/libavformat/riffenc.c
>> +++ b/libavformat/riffenc.c
>> @@ -228,7 +228,8 @@ void ff_put_bmp_header(AVIOContext *pb, 
>> AVCodecParameters *par,
>> pal_avi = !for_asf &&
>>   (pix_fmt == AV_PIX_FMT_PAL8 ||
>>pix_fmt == AV_PIX_FMT_MONOWHITE ||
>> -   pix_fmt == AV_PIX_FMT_MONOBLACK);
>> +   pix_fmt == AV_PIX_FMT_MONOBLACK ||
>> +   pix_fmt == AV_PIX_FMT_GRAY8);
>> 
>> /* Size (not including the size of the color table or color masks) */
>> avio_wl32(pb, 40 + (ignore_extradata || pal_avi ? 0 : extradata_size));
>> @@ -263,6 +264,13 @@ void ff_put_bmp_header(AVIOContext *pb, 
>> AVCodecParameters *par,
>> avio_wl32(pb, 0xff);
>> else if (i == 1 && pix_fmt == AV_PIX_FMT_MONOBLACK)
>> avio_wl32(pb, 0xff);
>> +else if (pix_fmt == AV_PIX_FMT_PAL8 || pix_fmt == 
>> AV_PIX_FMT_GRAY8) {
>> +/* Initialize palette */
>> +avio_w8(pb,i);
>> +avio_w8(pb,i);
>> +avio_w8(pb,i);
>> +avio_w8(pb,0);
> 
> I dont think this will always match the palette
> also this is going to break fate i would assume
> 
> 
> [...]
> 
> -- 
> Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
> 
> Why not whip the teacher when the pupil misbehaves? -- Diogenes of Sinope
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


Re: [FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread Hendrik Leppkes
On Tue, Aug 17, 2021 at 8:30 AM Wu Jianhua  wrote:
>
> Based on IceLake-AVX512 and newer architecture, a broad
> range of the subsets of AVX512 could be supported.
>
> Signed-off-by: Wu Jianhua 
> ---
>  configure | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/configure b/configure
> index 94b30afe74..04caa25736 100755
> --- a/configure
> +++ b/configure
> @@ -6057,7 +6057,9 @@ EOF
>  elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
>  esac
>
> -enabled avx512 && check_x86asm avx512_external "vmovdqa32 
> [eax]{k1}{z}, zmm0"
> +# Only IceLake and newer architectures could enable AVX512
> +# 
> F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
> +enabled avx512 && check_x86asm avx512_external "vpdpwssds 
> zmm31{k1}{z}, zmm29, zmm28"
>  enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0, 
> ymm0, 0"
>  enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, xmm1, 
> xmm2, xmm3"
>  enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0, ymm1, 
> ymm2, ymm3"

Note that you are just checking the functionality of the assembler
here, not having a runtime impact.
What you would likely want is to update avutil/x86/cpu.c as well to
only enable the AVX512 flag on those CPUs.

- Hendrik
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] [PATCH] configure: set IceLake-AVX512 as the minimum baseline

2021-08-17 Thread Wu Jianhua
Based on IceLake-AVX512 and newer architecture, a broad
range of the subsets of AVX512 could be supported.

Signed-off-by: Wu Jianhua 
---
 configure | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure b/configure
index 94b30afe74..04caa25736 100755
--- a/configure
+++ b/configure
@@ -6057,7 +6057,9 @@ EOF
 elf*) enabled debug && append X86ASMFLAGS $x86asm_debug ;;
 esac
 
-enabled avx512 && check_x86asm avx512_external "vmovdqa32 
[eax]{k1}{z}, zmm0"
+# Only IceLake and newer architectures could enable AVX512
+# 
F/CD/BW/DQ/VL/VNNI/IFMA/VBMI/VBMI2/VPOPCNTDQ/BITALG/GFNI/VAES/VPCLMULQDQ
+enabled avx512 && check_x86asm avx512_external "vpdpwssds 
zmm31{k1}{z}, zmm29, zmm28"
 enabled avx2   && check_x86asm avx2_external   "vextracti128 xmm0, 
ymm0, 0"
 enabled xop&& check_x86asm xop_external"vpmacsdd xmm0, xmm1, 
xmm2, xmm3"
 enabled fma4   && check_x86asm fma4_external   "vfmaddps ymm0, ymm1, 
ymm2, ymm3"
-- 
2.25.1

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".


[FFmpeg-devel] 回复: [PATCH 2/2] avfilter/dnn_processing: Add TensorRT backend

2021-08-17 Thread Xiaowei Wang


>No, dlopen() is not allowed for this kind of thing. Linking must be added at 
>build time.

>You for that matter apparently add support for build time linking in patch 1, 
>then attempt to remove it in this one, leaving cruft in the configure script. 
>Why?

Sorry for the late reply, outlook automatically put the mail in the junk box. 
As I replied earlier, TensorRT only provides C++ API, which means the filter 
will be implemented in C++, however, I was told that submitting C++ code is not 
a good idea, so I came up with this dlopen() idea, in this way, I can wrap C++ 
code into C interfaces and only submit C code.

If dlopen() is not allowed and submitting C++ code is fine, I will reorg the 
code and get back to what I did in patch 1. Is this OK?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".