[FFmpeg-devel] [FFmpeg-devel V2] avformat/rtpenc_mpegts: check avformat_new_stream() return value

2017-11-26 Thread Pan Bian
The function avformat_new_stream() returns a NULL pointer on failure.
However, in function rtp_mpegts_write_header(), its return value is not
validated before it is dereferenced. Check the return value against NULL
to avoid potential NULL dereference.

Signed-off-by: Pan Bian 
---
V2: fix patcheck warnings
---
 libavformat/rtpenc_mpegts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavformat/rtpenc_mpegts.c b/libavformat/rtpenc_mpegts.c
index 7af02e0..5f81e1a 100644
--- a/libavformat/rtpenc_mpegts.c
+++ b/libavformat/rtpenc_mpegts.c
@@ -85,6 +85,10 @@ static int rtp_mpegts_write_header(AVFormatContext *s)
 }
 rtp_ctx->oformat = rtp_format;
 st = avformat_new_stream(rtp_ctx, NULL);
+if (!st) {
+ret = AVERROR(ENOMEM);
+goto fail;
+}
 st->time_base.num   = 1;
 st->time_base.den   = 9;
 st->codecpar->codec_id = AV_CODEC_ID_MPEG2TS;
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [FFmpeg-devel 1/1] avformat/rtpenc_mpegts: check avformat_new_stream() return value

2017-11-26 Thread Pan Bian
The function avformat_new_stream() returns a NULL pointer on failure.
However, in function rtp_mpegts_write_header(), its return value is not
validated before it is dereferenced. Check the return value against NULL
to avoid potential NULL dereference.

Signed-off-by: Pan Bian 
---
V2: fix patcheck warnings
---
 libavformat/rtpenc_mpegts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavformat/rtpenc_mpegts.c b/libavformat/rtpenc_mpegts.c
index 7af02e0..5f81e1a 100644
--- a/libavformat/rtpenc_mpegts.c
+++ b/libavformat/rtpenc_mpegts.c
@@ -85,6 +85,10 @@ static int rtp_mpegts_write_header(AVFormatContext *s)
 }
 rtp_ctx->oformat = rtp_format;
 st = avformat_new_stream(rtp_ctx, NULL);
+if (!st) {
+ret = AVERROR(ENOMEM);
+goto fail;
+}
 st->time_base.num   = 1;
 st->time_base.den   = 9;
 st->codecpar->codec_id = AV_CODEC_ID_MPEG2TS;
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [FFmpeg-devel 1/1] avcodec/samidec: check av_strdup() return value

2017-11-26 Thread Pan Bian
In function sami_paragraph_to_ass(), the return value of av_strdup() is
not checked. To avoid potential NULL dereference, the return value
should be checked against NULL.

Signed-off-by: Pan Bian 
---
V2: fix patcheck warnings
---
 libavcodec/samidec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
index 2620424..6a59806 100644
--- a/libavcodec/samidec.c
+++ b/libavcodec/samidec.c
@@ -48,6 +48,9 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const 
char *src)
 AVBPrint *dst_content = >encoded_content;
 AVBPrint *dst_source = >encoded_source;
 
+if (!dupsrc)
+return AVERROR(ENOMEM);
+
 av_bprint_clear(>encoded_content);
 av_bprint_clear(>content);
 av_bprint_clear(>encoded_source);
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/4] avformat/mxfenc: use track count to generate component instance uuid

2017-11-26 Thread Mark Reid
---
 libavformat/mxf.h   |  1 -
 libavformat/mxfenc.c| 45 +
 tests/ref/fate/copy-trac4914|  2 +-
 tests/ref/fate/time_base|  2 +-
 tests/ref/lavf/mxf  |  6 +++---
 tests/ref/lavf/mxf_d10  |  2 +-
 tests/ref/lavf/mxf_dv25 |  2 +-
 tests/ref/lavf/mxf_dvcpro50 |  2 +-
 tests/ref/lavf/mxf_opatom   |  2 +-
 tests/ref/lavf/mxf_opatom_audio |  2 +-
 10 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index f3db1f939b..2d5b44943b 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h
@@ -45,7 +45,6 @@ enum MXFMetadataSetType {
 SubDescriptor,
 IndexTableSegment,
 EssenceContainerData,
-TypeBottom,// add metadata type before this
 EssenceGroup,
 TaggedValue,
 };
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index ed6ecbf541..d573586fe4 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -105,6 +105,7 @@ typedef struct MXFPackage {
 char *name;
 enum MXFMetadataSetType type;
 int instance;
+int uuid_offset;
 } MXFPackage;
 
 enum ULIndex {
@@ -846,6 +847,10 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, MXFPackage *packag
 MXFContext *mxf = s->priv_data;
 AVIOContext *pb = s->pb;
 MXFStreamContext *sc = st->priv_data;
+int instance = package->uuid_offset;
+
+if (st != mxf->timecode_track)
+instance += st->index + 1;
 
 mxf_write_metadata_key(pb, 0x013b00);
 PRINT_KEY(s, "track key", pb->buf_ptr - 16);
@@ -853,7 +858,7 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, MXFPackage *packag
 
 // write track uid
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, package->type == MaterialPackage ? Track : Track + 
TypeBottom, st->index);
+mxf_write_uuid(pb, Track, instance);
 PRINT_KEY(s, "track uid", pb->buf_ptr - 16);
 
 // write track id
@@ -884,7 +889,7 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, MXFPackage *packag
 
 // write sequence refs
 mxf_write_local_tag(pb, 16, 0x4803);
-mxf_write_uuid(pb, package->type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
+mxf_write_uuid(pb, Sequence, instance);
 }
 
 static const uint8_t smpte_12m_timecode_track_data_ul[] = { 
0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x01,0x01,0x00,0x00,0x00 
};
@@ -918,13 +923,17 @@ static void mxf_write_sequence(AVFormatContext *s, 
AVStream *st, MXFPackage *pac
 MXFContext *mxf = s->priv_data;
 AVIOContext *pb = s->pb;
 enum MXFMetadataSetType component;
+int instance = package->uuid_offset;
+
+if (st != mxf->timecode_track)
+instance += st->index + 1;
 
 mxf_write_metadata_key(pb, 0x010f00);
 PRINT_KEY(s, "sequence key", pb->buf_ptr - 16);
 klv_encode_ber_length(pb, 80);
 
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, package->type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
+mxf_write_uuid(pb, Sequence, instance);
 
 PRINT_KEY(s, "sequence uid", pb->buf_ptr - 16);
 mxf_write_common_fields(s, st);
@@ -936,9 +945,8 @@ static void mxf_write_sequence(AVFormatContext *s, AVStream 
*st, MXFPackage *pac
 component = TimecodeComponent;
 else
 component = SourceClip;
-if (package->type == SourcePackage)
-component += TypeBottom;
-mxf_write_uuid(pb, component, st->index);
+
+mxf_write_uuid(pb, component, instance);
 }
 
 static void mxf_write_timecode_component(AVFormatContext *s, AVStream *st, 
MXFPackage *package)
@@ -951,8 +959,7 @@ static void mxf_write_timecode_component(AVFormatContext 
*s, AVStream *st, MXFPa
 
 // UID
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, package->type == MaterialPackage ? TimecodeComponent :
-   TimecodeComponent + TypeBottom, st->index);
+mxf_write_uuid(pb, TimecodeComponent, package->uuid_offset);
 
 mxf_write_common_fields(s, st);
 
@@ -973,6 +980,7 @@ static void mxf_write_structural_component(AVFormatContext 
*s, AVStream *st, MXF
 {
 AVIOContext *pb = s->pb;
 int i;
+int instance = package->uuid_offset + st->index + 1;
 
 mxf_write_metadata_key(pb, 0x011100);
 PRINT_KEY(s, "sturctural component key", pb->buf_ptr - 16);
@@ -980,7 +988,7 @@ static void mxf_write_structural_component(AVFormatContext 
*s, AVStream *st, MXF
 
 // write uid
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, package->type == MaterialPackage ? SourceClip: 
SourceClip + TypeBottom, st->index);
+mxf_write_uuid(pb, SourceClip, instance);
 
 PRINT_KEY(s, "structural component uid", pb->buf_ptr - 16);
 mxf_write_common_fields(s, st);
@@ -1329,7 +1337,7 @@ static int mxf_write_user_comments(AVFormatContext *s, 
const AVDictionary *m)
 return count;
 }
 
-static void mxf_write_package(AVFormatContext *s, MXFPackage 

[FFmpeg-devel] [PATCH 3/4] avformat/mxfenc: write reel_name if metadata key is present

2017-11-26 Thread Mark Reid
---
 libavformat/mxf.h|  1 +
 libavformat/mxfenc.c | 52 ++--
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index 2d5b44943b..ffcc429a8b 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h
@@ -47,6 +47,7 @@ enum MXFMetadataSetType {
 EssenceContainerData,
 EssenceGroup,
 TaggedValue,
+TapeDescriptor,
 };
 
 enum MXFFrameLayout {
diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index d573586fe4..8280f0236e 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -106,6 +106,7 @@ typedef struct MXFPackage {
 enum MXFMetadataSetType type;
 int instance;
 int uuid_offset;
+struct MXFPackage *ref;
 } MXFPackage;
 
 enum ULIndex {
@@ -999,20 +1000,33 @@ static void 
mxf_write_structural_component(AVFormatContext *s, AVStream *st, MXF
 
 // write source package uid, end of the reference
 mxf_write_local_tag(pb, 32, 0x1101);
-if (package->type == SourcePackage) {
+if (!package->ref) {
 for (i = 0; i < 4; i++)
 avio_wb64(pb, 0);
 } else
-mxf_write_umid(s, 1);
+mxf_write_umid(s, package->ref->instance);
 
 // write source track id
 mxf_write_local_tag(pb, 4, 0x1102);
-if (package->type == SourcePackage)
+if (package->type == SourcePackage && !package->ref)
 avio_wb32(pb, 0);
 else
 avio_wb32(pb, st->index+2);
 }
 
+static void mxf_write_tape_descriptor(AVFormatContext *s)
+{
+AVIOContext *pb = s->pb;
+
+mxf_write_metadata_key(pb, 0x012e00);
+PRINT_KEY(s, "tape descriptor key", pb->buf_ptr - 16);
+klv_encode_ber_length(pb, 20);
+mxf_write_local_tag(pb, 16, 0x3C0A);
+mxf_write_uuid(pb, TapeDescriptor, 0);
+PRINT_KEY(s, "tape_desc uid", pb->buf_ptr - 16);
+}
+
+
 static void mxf_write_multi_descriptor(AVFormatContext *s)
 {
 MXFContext *mxf = s->priv_data;
@@ -1396,13 +1410,17 @@ static int mxf_write_package(AVFormatContext *s, 
MXFPackage *package)
 }
 
 // write multiple descriptor reference
-if (package->type == SourcePackage) {
+if (package->instance == 1) {
 mxf_write_local_tag(pb, 16, 0x4701);
 if (s->nb_streams > 1) {
 mxf_write_uuid(pb, MultipleDescriptor, 0);
 mxf_write_multi_descriptor(s);
 } else
 mxf_write_uuid(pb, SubDescriptor, 0);
+} else if (package->instance == 2) {
+mxf_write_local_tag(pb, 16, 0x4701);
+mxf_write_uuid(pb, TapeDescriptor, 0);
+mxf_write_tape_descriptor(s);
 }
 
 // write timecode track
@@ -1416,7 +1434,7 @@ static int mxf_write_package(AVFormatContext *s, 
MXFPackage *package)
 mxf_write_sequence(s, st, package);
 mxf_write_structural_component(s, st, package);
 
-if (package->type == SourcePackage) {
+if (package->instance == 1) {
 MXFStreamContext *sc = st->priv_data;
 mxf_essence_container_uls[sc->index].write_desc(s, st);
 }
@@ -1452,11 +1470,13 @@ static int 
mxf_write_header_metadata_sets(AVFormatContext *s)
 AVStream *st = NULL;
 int i;
 int track_count = 0;
-MXFPackage packages[2] = {};
+MXFPackage packages[3] = {};
 int package_count = 2;
 packages[0].type = MaterialPackage;
 packages[1].type = SourcePackage;
 packages[1].instance = 1;
+packages[0].ref = [1];
+
 
 if (entry = av_dict_get(s->metadata, "material_package_name", NULL, 0))
packages[0].name = entry->value;
@@ -1474,6 +1494,26 @@ static int 
mxf_write_header_metadata_sets(AVFormatContext *s)
 }
 }
 
+if (entry = av_dict_get(s->metadata, "reel_name", NULL, 0)) {
+packages[2].name = entry->value;
+} else {
+/* check if any of the streams contain a reel_name */
+for (i = 0; i < s->nb_streams; i++) {
+st = s->streams[i];
+if (entry = av_dict_get(st->metadata, "reel_name", NULL, 0)) {
+packages[2].name = entry->value;
+break;
+}
+}
+}
+
+if (packages[2].name) {
+packages[2].type = SourcePackage;
+packages[2].instance = 2;
+packages[1].ref = [2];
+package_count = 3;
+}
+
 mxf_write_preface(s);
 mxf_write_identification(s);
 mxf_write_content_storage(s, packages, package_count);
-- 
2.13.6 (Apple Git-96)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/4] avformat/mxfenc: pass MXFPackage around instead of type

2017-11-26 Thread Mark Reid
---
 libavformat/mxfenc.c | 99 +---
 1 file changed, 55 insertions(+), 44 deletions(-)

diff --git a/libavformat/mxfenc.c b/libavformat/mxfenc.c
index 035e65ed43..ed6ecbf541 100644
--- a/libavformat/mxfenc.c
+++ b/libavformat/mxfenc.c
@@ -101,6 +101,12 @@ typedef struct MXFContainerEssenceEntry {
 void (*write_desc)(AVFormatContext *, AVStream *);
 } MXFContainerEssenceEntry;
 
+typedef struct MXFPackage {
+char *name;
+enum MXFMetadataSetType type;
+int instance;
+} MXFPackage;
+
 enum ULIndex {
 INDEX_MPEG2 = 0,
 INDEX_AES3,
@@ -808,13 +814,14 @@ static void mxf_write_identification(AVFormatContext *s)
 avio_wb64(pb, mxf->timestamp);
 }
 
-static void mxf_write_content_storage(AVFormatContext *s)
+static void mxf_write_content_storage(AVFormatContext *s, MXFPackage 
*packages, int package_count)
 {
 AVIOContext *pb = s->pb;
+int i;
 
 mxf_write_metadata_key(pb, 0x011800);
 PRINT_KEY(s, "content storage key", pb->buf_ptr - 16);
-klv_encode_ber_length(pb, 92);
+klv_encode_ber_length(pb, 60 + (16 * package_count));
 
 // write uid
 mxf_write_local_tag(pb, 16, 0x3C0A);
@@ -822,10 +829,11 @@ static void mxf_write_content_storage(AVFormatContext *s)
 PRINT_KEY(s, "content storage uid", pb->buf_ptr - 16);
 
 // write package reference
-mxf_write_local_tag(pb, 16 * 2 + 8, 0x1901);
-mxf_write_refs_count(pb, 2);
-mxf_write_uuid(pb, MaterialPackage, 0);
-mxf_write_uuid(pb, SourcePackage, 0);
+mxf_write_local_tag(pb, 16 * package_count + 8, 0x1901);
+mxf_write_refs_count(pb, package_count);
+for (i = 0; i < package_count; i++) {
+mxf_write_uuid(pb, packages[i].type, packages[i].instance);
+}
 
 // write essence container data
 mxf_write_local_tag(pb, 8 + 16, 0x1902);
@@ -833,7 +841,7 @@ static void mxf_write_content_storage(AVFormatContext *s)
 mxf_write_uuid(pb, EssenceContainerData, 0);
 }
 
-static void mxf_write_track(AVFormatContext *s, AVStream *st, enum 
MXFMetadataSetType type)
+static void mxf_write_track(AVFormatContext *s, AVStream *st, MXFPackage 
*package)
 {
 MXFContext *mxf = s->priv_data;
 AVIOContext *pb = s->pb;
@@ -845,7 +853,7 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, enum MXFMetadataSe
 
 // write track uid
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, type == MaterialPackage ? Track : Track + TypeBottom, 
st->index);
+mxf_write_uuid(pb, package->type == MaterialPackage ? Track : Track + 
TypeBottom, st->index);
 PRINT_KEY(s, "track uid", pb->buf_ptr - 16);
 
 // write track id
@@ -854,7 +862,7 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, enum MXFMetadataSe
 
 // write track number
 mxf_write_local_tag(pb, 4, 0x4804);
-if (type == MaterialPackage)
+if (package->type == MaterialPackage)
 avio_wb32(pb, 0); // track number of material package is 0
 else
 avio_write(pb, sc->track_essence_element_key + 12, 4);
@@ -876,7 +884,7 @@ static void mxf_write_track(AVFormatContext *s, AVStream 
*st, enum MXFMetadataSe
 
 // write sequence refs
 mxf_write_local_tag(pb, 16, 0x4803);
-mxf_write_uuid(pb, type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
+mxf_write_uuid(pb, package->type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
 }
 
 static const uint8_t smpte_12m_timecode_track_data_ul[] = { 
0x06,0x0E,0x2B,0x34,0x04,0x01,0x01,0x01,0x01,0x03,0x02,0x01,0x01,0x00,0x00,0x00 
};
@@ -905,7 +913,7 @@ static void mxf_write_common_fields(AVFormatContext *s, 
AVStream *st)
 }
 }
 
-static void mxf_write_sequence(AVFormatContext *s, AVStream *st, enum 
MXFMetadataSetType type)
+static void mxf_write_sequence(AVFormatContext *s, AVStream *st, MXFPackage 
*package)
 {
 MXFContext *mxf = s->priv_data;
 AVIOContext *pb = s->pb;
@@ -916,7 +924,7 @@ static void mxf_write_sequence(AVFormatContext *s, AVStream 
*st, enum MXFMetadat
 klv_encode_ber_length(pb, 80);
 
 mxf_write_local_tag(pb, 16, 0x3C0A);
-mxf_write_uuid(pb, type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
+mxf_write_uuid(pb, package->type == MaterialPackage ? Sequence: Sequence + 
TypeBottom, st->index);
 
 PRINT_KEY(s, "sequence uid", pb->buf_ptr - 16);
 mxf_write_common_fields(s, st);
@@ -928,12 +936,12 @@ static void mxf_write_sequence(AVFormatContext *s, 
AVStream *st, enum MXFMetadat
 component = TimecodeComponent;
 else
 component = SourceClip;
-if (type == SourcePackage)
+if (package->type == SourcePackage)
 component += TypeBottom;
 mxf_write_uuid(pb, component, st->index);
 }
 
-static void mxf_write_timecode_component(AVFormatContext *s, AVStream *st, 
enum MXFMetadataSetType type)
+static void mxf_write_timecode_component(AVFormatContext *s, AVStream *st, 
MXFPackage *package)
 {
 MXFContext *mxf = 

[FFmpeg-devel] [PATCH 4/4] fate/mxf: add reel name test

2017-11-26 Thread Mark Reid
---
 tests/fate/mxf.mak   | 8 ++--
 tests/ref/fate/mxf-reel_name | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)
 create mode 100644 tests/ref/fate/mxf-reel_name

diff --git a/tests/fate/mxf.mak b/tests/fate/mxf.mak
index 7714b61569..dce23d522e 100644
--- a/tests/fate/mxf.mak
+++ b/tests/fate/mxf.mak
@@ -33,9 +33,13 @@ FATE_MXF_PROBE-$(call ENCDEC2, DVVIDEO, PCM_S16LE, MXF) += 
fate-mxf-probe-dv25
 fate-mxf-probe-dv25: SRC = $(TARGET_SAMPLES)/mxf/Avid-5.mxf
 fate-mxf-probe-dv25: CMD = run $(PROBE_FORMAT_STREAMS_COMMAND) -i "$(SRC)"
 
+FATE_MXF_REEL_NAME-$(call ENCDEC2, MPEG2VIDEO, PCM_S16LE, MXF) += 
fate-mxf-reel_name
+fate-mxf-reel_name: $(TARGET_SAMPLES)/mxf/Sony-1.mxf
+fate-mxf-reel_name: CMD = md5 -y -i $(TARGET_SAMPLES)/mxf/Sony-1.mxf  -c 
copy -timecode 00:00:00:00 -metadata "reel_name=test_reel" -fflags +bitexact -f 
mxf
+
 FATE_MXF-$(CONFIG_MXF_DEMUXER) += $(FATE_MXF)
 
-FATE_SAMPLES_AVCONV += $(FATE_MXF-yes)
+FATE_SAMPLES_AVCONV += $(FATE_MXF-yes) $(FATE_MXF_REEL_NAME-yes)
 FATE_SAMPLES_FFPROBE += $(FATE_MXF_PROBE-yes)
 
-fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes)
+fate-mxf: $(FATE_MXF-yes) $(FATE_MXF_PROBE-yes) $(FATE_MXF_REEL_NAME-yes)
diff --git a/tests/ref/fate/mxf-reel_name b/tests/ref/fate/mxf-reel_name
new file mode 100644
index 00..fb9586097a
--- /dev/null
+++ b/tests/ref/fate/mxf-reel_name
@@ -0,0 +1 @@
+dda6c54b642b8794a87d809fdb361f95
-- 
2.13.6 (Apple Git-96)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 0/4] avformat/mxfenc add reel_name write support

2017-11-26 Thread Mark Reid
Hi,
The following patch series adds support for writing reel names to mxf files.
The extra metadata is only written if the metadata key "reel_name" is present.
The mxf fate tests needed to be updated because I slightly change to the way 
internal uuids for tracks and component metadata are calculated.

Mark Reid (4):
  avformat/mxfenc: pass MXFPackage around instead of type
  avformat/mxfenc: use track count to generate component instance uuid
  avformat/mxfenc: write reel_name if metadata key is present
  fate/mxf: add reel name test

 libavformat/mxf.h   |   2 +-
 libavformat/mxfenc.c| 160 
 tests/fate/mxf.mak  |   8 +-
 tests/ref/fate/copy-trac4914|   2 +-
 tests/ref/fate/mxf-reel_name|   1 +
 tests/ref/fate/time_base|   2 +-
 tests/ref/lavf/mxf  |   6 +-
 tests/ref/lavf/mxf_d10  |   2 +-
 tests/ref/lavf/mxf_dv25 |   2 +-
 tests/ref/lavf/mxf_dvcpro50 |   2 +-
 tests/ref/lavf/mxf_opatom   |   2 +-
 tests/ref/lavf/mxf_opatom_audio |   2 +-
 12 files changed, 129 insertions(+), 62 deletions(-)
 create mode 100644 tests/ref/fate/mxf-reel_name

-- 
2.13.6 (Apple Git-96)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] lavc/utils: simplify lockmgr

2017-11-26 Thread Rostislav Pehlivanov
Again, totally unneded use of the atomic function to set/NULL a local variable.

Signed-off-by: Rostislav Pehlivanov 
---
 libavcodec/utils.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index de2dcea54d..17388ef637 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -26,7 +26,6 @@
  */
 
 #include "config.h"
-#include "libavutil/atomic.h"
 #include "libavutil/attributes.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
@@ -85,9 +84,11 @@ static int default_lockmgr_cb(void **arg, enum AVLockOp op)
 av_free(tmp);
 return AVERROR(err);
 }
-if (avpriv_atomic_ptr_cas(mutex, NULL, tmp)) {
+if (*mutex) {
 pthread_mutex_destroy(tmp);
 av_free(tmp);
+} else {
+*mutex = tmp;
 }
 }
 
@@ -103,8 +104,7 @@ static int default_lockmgr_cb(void **arg, enum AVLockOp op)
 case AV_LOCK_DESTROY:
 if (*mutex)
 pthread_mutex_destroy(*mutex);
-av_free(*mutex);
-avpriv_atomic_ptr_cas(mutex, *mutex, NULL);
+av_freep(*mutex);
 return 0;
 }
 return 1;
-- 
2.15.0.417.g466bffb3ac

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 00/15] OpenCL infrastructure, filters

2017-11-26 Thread hydra3333
OK and thank you, these do work fine and produce handy results and the 
documentation you linked works.


".\ffmpeg_3.latest_master.exe" -v verbose -init_hw_device list
".\ffmpeg_3.latest_master.exe" -v verbose -init_hw_device opencl

I'll head over to ffmpeg-user (and do some reading) to find out what 
concepts/knowledge

I'm missing in regard to specifying parameters for hardware acceleration
and why the below didn't work with
  "Impossible to convert between the formats supported by the filter 
'Parsed_yadif_0' and the filter 'auto_scaler_0'"

  "Failed to inject frame into filter network: Function not implemented"

P.S. really nice work !!

".\ffmpeg_3.latest_master.exe" -hide_banner -v verbose -init_hw_device 
opencl=ocl:1.0 -filter_hw_device ocl -i 
".\test_01.mpg" -an -map_metadata -1 -sws_flags 
lanczos+accurate_rnd+full_chroma_int+full_chroma_inp -filter:v 
yadif=0:0:0,unsharp_opencl=lx=3:ly=3:la=0.5:cx=3:cy=3:ca=0.5,setdar=dar=16/9 
-r 25 -c:v h264_nvenc -preset slow -bf 2 -g 50 -refs 3 -rc:v 
vbr_hq -rc-lookahead:v 32 -cq 22 -qmin 16 -qmax 25 -coder cabac -movflags 
+faststart -profile:v high -level 4.1 -pixel_format yuv420p -y 
".\test_01.newest.MP4"

[AVHWDeviceContext @ 01b856bdb720] 1.0: NVIDIA CUDA / GeForce GTX 750 Ti
[AVHWDeviceContext @ 01b856bdb720] DXVA2 to OpenCL mapping function 
found (clCreateFromDX9MediaSurfaceKHR).
[AVHWDeviceContext @ 01b856bdb720] DXVA2 in OpenCL acquire function 
found (clEnqueueAcquireDX9MediaSurfacesKHR).
[AVHWDeviceContext @ 01b856bdb720] DXVA2 in OpenCL release function 
found (clEnqueueReleaseDX9MediaSurfacesKHR).
[AVHWDeviceContext @ 01b856bdb720] The cl_khr_d3d11_sharing extension is 
required for D3D11 to OpenCL mapping.

[AVHWDeviceContext @ 01b856bdb720] D3D11 to OpenCL mapping not usable.
[mpeg @ 01b856bde0e0] max_analyze_duration 500 reached at 500 
microseconds st:0

Input #0, mpeg, from '.\test_01.mpg':
 Duration: 00:06:29.96, start: 0.24, bitrate: 2799 kb/s
   Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, 
yuv420p(tv, top first, left), 720x576 [SAR 64:45 DAR 16:9], 25 fps, 25 tbr, 
90k tbn, 50 tbc

   Stream #0:1[0x1c0]: Audio: mp2, 48000 Hz, stereo, s16p, 256 kb/s
Stream mapping:
 Stream #0:0 -> #0:0 (mpeg2video (native) -> h264 (h264_nvenc))
Press [q] to stop, [?] for help
[graph 0 input from stream 0:0 @ 01b856c0cfc0] w:720 h:576 
pixfmt:yuv420p tb:1/9 fr:25/1 sar:64/45 sws_param:flags=2

[auto_scaler_0 @ 01b856c0db20] w:iw h:ih flags:'bicubic' interl:0
[Parsed_unsharp_opencl_1 @ 01b856c0d160] auto-inserting filter 
'auto_scaler_0' between the filter 'Parsed_yadif_0' and the filter 
'Parsed_unsharp_opencl_1'
Impossible to convert between the formats supported by the filter 
'Parsed_yadif_0' and the filter 'auto_scaler_0'

Error reinitializing filters!
Failed to inject frame into filter network: Function not implemented
Error while processing the decoded data for stream #0:0
Conversion failed!

".\ffmpeg_3.latest_master.exe" -hide_banner -v verbose -init_hw_device 
opencl=ocl:1.0 -filter_hw_device ocl -i 
".\test_01.mpg" -an -map_metadata -1 -sws_flags 
lanczos+accurate_rnd+full_chroma_int+full_chroma_inp -filter:v 
yadif=0:0:0,format=pix_fmts=yuv420p,unsharp_opencl=lx=3:ly=3:la=0.5:cx=3:cy=3:ca=0.5,setdar=dar=16/9 
-r 25 -c:v h264_nvenc -preset slow -bf 2 -g 50 -refs 3 -rc:v 
vbr_hq -rc-lookahead:v 32 -cq 22 -qmin 16 -qmax 25 -coder cabac -movflags 
+faststart -profile:v high -level 4.1 -pixel_format yuv420p -y 
".\test_01.newest.MP4"

[AVHWDeviceContext @ 01fc26abb760] 1.0: NVIDIA CUDA / GeForce GTX 750 Ti
[AVHWDeviceContext @ 01fc26abb760] DXVA2 to OpenCL mapping function 
found (clCreateFromDX9MediaSurfaceKHR).
[AVHWDeviceContext @ 01fc26abb760] DXVA2 in OpenCL acquire function 
found (clEnqueueAcquireDX9MediaSurfacesKHR).
[AVHWDeviceContext @ 01fc26abb760] DXVA2 in OpenCL release function 
found (clEnqueueReleaseDX9MediaSurfacesKHR).
[AVHWDeviceContext @ 01fc26abb760] The cl_khr_d3d11_sharing extension is 
required for D3D11 to OpenCL mapping.

[AVHWDeviceContext @ 01fc26abb760] D3D11 to OpenCL mapping not usable.
[mpeg @ 01fc26abe120] max_analyze_duration 500 reached at 500 
microseconds st:0

Input #0, mpeg, from '.\test_01.mpg':
 Duration: 00:06:29.96, start: 0.24, bitrate: 2799 kb/s
   Stream #0:0[0x1e0]: Video: mpeg2video (Main), 1 reference frame, 
yuv420p(tv, top first, left), 720x576 [SAR 64:45 DAR 16:9], 25 fps, 25 tbr, 
90k tbn, 50 tbc

   Stream #0:1[0x1c0]: Audio: mp2, 48000 Hz, stereo, s16p, 256 kb/s
Stream mapping:
 Stream #0:0 -> #0:0 (mpeg2video (native) -> h264 (h264_nvenc))
Press [q] to stop, [?] for help
[graph 0 input from stream 0:0 @ 01fc2d9fe120] w:720 h:576 
pixfmt:yuv420p tb:1/9 fr:25/1 sar:64/45 sws_param:flags=2

[auto_scaler_0 @ 01fc2d9fe940] w:iw h:ih flags:'bicubic' interl:0
[Parsed_unsharp_opencl_2 @ 01fc2d9fe6c0] auto-inserting filter 
'auto_scaler_0' between the filter 

Re: [FFmpeg-devel] [PATCH] avfilter/drawbox: rename variable for maximum thickness

2017-11-26 Thread Gyan Doshi


On 11/23/2017 7:12 PM, Gyan Doshi wrote:


On 11/20/2017 3:59 PM, Gyan Doshi wrote:
At present, the value name 'max' for maximum thickness in drawbox (and 
drawgrid) filter leads to a parse error if the thickness expression 
contains 'max(val1,val2)' i.e.


 [Eval @ ...] Invalid chars '(20,30)' at the end of expression 
'max(20,30)'


Renamed to 'fill'; tested & documented.


Ping. x2
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/4] lavc/parser: use C11 atomics

2017-11-26 Thread Rostislav Pehlivanov
Signed-off-by: Rostislav Pehlivanov 
---
 libavcodec/parser.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/libavcodec/parser.c b/libavcodec/parser.c
index 670680ea7c..baf1de4d88 100644
--- a/libavcodec/parser.c
+++ b/libavcodec/parser.c
@@ -23,30 +23,32 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "libavutil/avassert.h"
-#include "libavutil/atomic.h"
 #include "libavutil/internal.h"
 #include "libavutil/mem.h"
 
 #include "internal.h"
 #include "parser.h"
 
-static AVCodecParser *av_first_parser = NULL;
+static _Atomic(AVCodecParser *)av_first_parser = NULL;
 
 AVCodecParser *av_parser_next(const AVCodecParser *p)
 {
 if (p)
 return p->next;
 else
-return av_first_parser;
+return atomic_load(_first_parser);
 }
 
 void av_register_codec_parser(AVCodecParser *parser)
 {
+AVCodecParser *old_parser;
 do {
-parser->next = av_first_parser;
-} while (parser->next != avpriv_atomic_ptr_cas((void * volatile 
*)_first_parser, parser->next, parser));
+parser->next = old_parser = atomic_load(_first_parser);
+atomic_compare_exchange_strong(_first_parser, _parser, parser);
+} while (parser->next != old_parser);
 }
 
 AVCodecParserContext *av_parser_init(int codec_id)
@@ -58,7 +60,7 @@ AVCodecParserContext *av_parser_init(int codec_id)
 if (codec_id == AV_CODEC_ID_NONE)
 return NULL;
 
-for (parser = av_first_parser; parser; parser = parser->next) {
+for (parser = atomic_load(_first_parser); parser; parser = 
parser->next) {
 if (parser->codec_ids[0] == codec_id ||
 parser->codec_ids[1] == codec_id ||
 parser->codec_ids[2] == codec_id ||
-- 
2.15.0.417.g466bffb3ac

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 4/4] lavc/utils: simplify codec registration

2017-11-26 Thread Rostislav Pehlivanov
Same as last 2 commits.

Signed-off-by: Rostislav Pehlivanov 
---
 libavcodec/utils.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index baf09119fe..de2dcea54d 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -185,8 +185,11 @@ av_cold void avcodec_register(AVCodec *codec)
 p = last_avcodec;
 codec->next = NULL;
 
-while(*p || avpriv_atomic_ptr_cas((void * volatile *)p, NULL, codec))
-p = &(*p)->next;
+/* Iterate through the list until the last entry has been reached */
+do {
+*p = codec;
+p  = &(codec)->next;
+} while (*p);
 last_avcodec = >next;
 
 if (codec->init_static_data)
-- 
2.15.0.417.g466bffb3ac

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/4] lavfi/avfilter: simplify filter registration

2017-11-26 Thread Rostislav Pehlivanov
Atomics were entirely pointless and did nothing but slow and complicate
the process down. This could be improved further still but the main
objective of this commit is to simplify.

Signed-off-by: Rostislav Pehlivanov 
---
 libavfilter/avfilter.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index b98b32bacb..6b98e77a8e 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -19,7 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/atomic.h"
 #include "libavutil/avassert.h"
 #include "libavutil/avstring.h"
 #include "libavutil/buffer.h"
@@ -599,8 +598,11 @@ int avfilter_register(AVFilter *filter)
 
 filter->next = NULL;
 
-while(*f || avpriv_atomic_ptr_cas((void * volatile *)f, NULL, filter))
-f = &(*f)->next;
+/* Iterate through the list until the last entry has been reached */
+do {
+*f = filter;
+f  = >next;
+} while (*f);
 last_filter = >next;
 
 return 0;
-- 
2.15.0.417.g466bffb3ac

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/4] lavf/format: simplify I/O registration

2017-11-26 Thread Rostislav Pehlivanov
Same as previous commit, unneded use of atomics.

Signed-off-by: Rostislav Pehlivanov 
---
 libavformat/format.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/libavformat/format.c b/libavformat/format.c
index 38ca2a3465..6ac7349ec5 100644
--- a/libavformat/format.c
+++ b/libavformat/format.c
@@ -19,7 +19,6 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavutil/atomic.h"
 #include "libavutil/avstring.h"
 #include "libavutil/bprint.h"
 #include "libavutil/opt.h"
@@ -62,9 +61,11 @@ void av_register_input_format(AVInputFormat *format)
 {
 AVInputFormat **p = last_iformat;
 
-// Note, format could be added after the first 2 checks but that implies 
that *p is no longer NULL
-while(p != >next && !format->next && avpriv_atomic_ptr_cas((void * 
volatile *)p, NULL, format))
-p = &(*p)->next;
+/* Iterate through the list until the last entry has been reached */
+while (p != >next && !format->next) {
+*p = format;
+p  = &(format)->next;
+}
 
 if (!format->next)
 last_iformat = >next;
@@ -74,9 +75,11 @@ void av_register_output_format(AVOutputFormat *format)
 {
 AVOutputFormat **p = last_oformat;
 
-// Note, format could be added after the first 2 checks but that implies 
that *p is no longer NULL
-while(p != >next && !format->next && avpriv_atomic_ptr_cas((void * 
volatile *)p, NULL, format))
-p = &(*p)->next;
+/* Iterate through the list until the last entry has been reached */
+while (p != >next && !format->next) {
+*p = format;
+p  = &(format)->next;
+}
 
 if (!format->next)
 last_oformat = >next;
-- 
2.15.0.417.g466bffb3ac

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH]lavf/mov: Do not blindly allocate stts entries

2017-11-26 Thread Carl Eugen Hoyos
Hi!

Attached patch avoids allocations >1GB for (short and) invalid mov
files with only reasonable speed impact.

Please review, Carl Eugen
From 0d243bad5fdd9850ff41d49a32a06274a3cd9756 Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Mon, 27 Nov 2017 05:13:25 +0100
Subject: [PATCH] lavf/mov: Do not blindly allocate huge memory blocks for
 stts entries.

Fixes large allocations for short files with invalid stts entry.
Fixes bugzilla 1102.
---
 libavformat/mov.c |   16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index ddb1e59..9d353bf 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -2838,14 +2838,24 @@ static int mov_read_stts(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 if (sc->stts_data)
 av_log(c->fc, AV_LOG_WARNING, "Duplicated STTS atom\n");
 av_free(sc->stts_data);
-sc->stts_count = 0;
-sc->stts_data = av_malloc_array(entries, sizeof(*sc->stts_data));
+sc->stts_count = FFMIN(1024 * 1024, entries);
+sc->stts_data = av_realloc_array(NULL, sc->stts_count, sizeof(*sc->stts_data));
 if (!sc->stts_data)
 return AVERROR(ENOMEM);
 
 for (i = 0; i < entries && !pb->eof_reached; i++) {
-int sample_duration;
+int sample_duration, ret;
 unsigned int sample_count;
+if (i > sc->stts_count) {
+ret = av_reallocp_array(>stts_data,
+FFMIN(sc->stts_count * 2LL, entries),
+sizeof(*sc->stts_data));
+if (ret < 0) {
+sc->stts_count = 0;
+return ret;
+}
+sc->stts_count = FFMIN(sc->stts_count * 2, entries);
+}
 
 sample_count=avio_rb32(pb);
 sample_duration = avio_rb32(pb);
-- 
1.7.10.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] avcodec/hevc_ps: set correct return value

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 4:22 GMT+01:00 Pan Bian :
> When the call to get_bits_left() fails, the return value is not an
> negative error code. This patch explicitly assignes an error
> code to the return variable ret.

Can't this break decoding of damaged streams that could be
decoded so far?

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 4:24 GMT+01:00 James Almer :
> On 11/27/2017 12:17 AM, Carl Eugen Hoyos wrote:

>> That's completely apart from the fact that this header file does
>> not comply with any style guide while Nvidia's does (from a
>> very quick look at both files).
>
> That would be because the Nvidia one was written/adapted by
> a ffmpeg developer, who probably paid attention to that.

(How is that related?)

> We can ask Mironov to improve the style in the header he
> submitted if that's important.

Not if you feel it is not important.

If everybody wants this header, I will try hard to abstain from
this discussion from now on.

Sorry, Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 4:00 GMT+01:00 Mironov, Mikhail :
>> -Original Message-
>> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
>> Of Carl Eugen Hoyos
>> Sent: November 26, 2017 9:36 PM
>> To: FFmpeg development discussions and patches > de...@ffmpeg.org>
>> Subject: Re: [FFmpeg-devel] AMD external header
>>
>> 2017-11-27 3:32 GMT+01:00 Mironov, Mikhail
>> :
>> >> -Original Message-
>> >> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On
>> Behalf
>> >> Of Carl Eugen Hoyos
>>
>> >> Also, imo you have not really explained which users would have an
>> >> advantage if FFmpeg includes the AMD header.
>> >
>> > It is more a question to people who included NVidia headers.
>>
>> I don't think my question can be answered by Nvidia users or developers.
>> You are the exclusive recipient for my question.
>>
>> > It was for convenience, isn't it?
>>
>> That is possible and I assume you want the AMD headers added for
>> convenience but that wasn't my question.
>
> In my many years of using FFmpeg as a library and as a tool I saw a lot of 
> people who build FFmpeg
> on they own as I do. One, but not all reasons for a custom build is desire to 
> debug whole application
> with one tool and on Windows many people still use Visual Studio so they make 
> a build with VS,
> which is not available in places like zeranoe.
> Another reason is to make minimum feature set needed to their app.

Thank you for the useful explanation!

> At the same time for many real-time applications a software encoder is not an 
> option.

> So these developers will appreciate default support for both GPUs.

But if all above only applies to developers (and not to users) I still
believe it is not necessary to add the header, simply because
every developer will manage to download a header from your
github page.

[...]

> What about AMD? We also have them.

I believe Hendrik answered to that argument some time ago,
others disagreed.

I can only state that the more often you repeat it, the stronger I
feel urged to request a code cleanup including documentation
for the header before it gets added.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread James Almer
On 11/27/2017 12:17 AM, Carl Eugen Hoyos wrote:
> 2017-11-27 4:00 GMT+01:00 James Almer :
>> On 11/26/2017 11:57 PM, Carl Eugen Hoyos wrote:
>>> 2017-11-27 3:42 GMT+01:00 James Almer :
>>>
 No comments about the code, but given this patchset has started a policy
 controversy I'll state I'm in favor of including this external header.
>>>
>>> Will the header work for operating systems other than Windows?
>>
>> Why are you asking me? I'm not the author of the patch.
>>
>> And a quick look at the configure change should answer your question.
> 
> What I meant was:
> The header currently does not help a relevant number of users
> (it would make Zeranoe's and Hendrik's life a little easier, that's
> all). In the future, it will be useful because AMD plans to provide
> a Linux driver. But I have a feeling that the current header will
> not work for this future driver, meaning adding the header now
> may be counter-productive.

It can easily be adapted for that alongside the configure checks, I'm
sure, much like the Nvidia one was for different needs.

> 
> That's completely apart from the fact that this header file does
> not comply with any style guide while Nvidia's does (from a
> very quick look at both files).

That would be because the Nvidia one was written/adapted by a ffmpeg
developer, who probably paid attention to that.
We can ask Mironov to improve the style in the header he submitted if
that's important.

The avxsynth headers don't follow any style guide either, for that
matter. Indentation is all over the place.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Mironov, Mikhail
> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
> Of Carl Eugen Hoyos
> Sent: November 26, 2017 10:20 PM
> To: FFmpeg development discussions and patches  de...@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD
> GPUs based on AMF SDK
> 
> 2017-11-27 4:06 GMT+01:00 Mironov, Mikhail
> :
> 
> > I think as a side affect we can help integrate Vulkan acceleration to 
> > FFmpeg.
> > It is much better then OpenCL for multimedia from performance
> perspective.
> 
> Why did I so strongly expect this argument?
> (I have neither ever used OpenCL nor Vulkan.)
> 
> Others may (and hopefully do) disagree but I believe you should really
> change your argumentation.
> 

I stated that this is a side note, not an argument.
Thank,
Mikhail
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Mironov, Mikhail
> 
> Nobody is against adding AMD specific encoders and hwaccels. The issue for
> some seems to be the inclusion of external headers.
> 
> Currently, nothing in our policy is against it, and as I and Philip already
> stated, your additions shouldn't be gated on a potential future policy change
> about bundled headers, so your patch can initially go in with the header.

I sent this email in response to Mark's who stated that he will not include the 
header. 
I understand the delayed decision about external headers and will be OK with 
any 
outcome as long as it is universal.

> 
> > I also noticed that recently there is a lot of activity to add full Nvidia
> decoders  to FFmpeg (VC1, MPEG4, VP8, VP9 etc.).
> > I am guessing this is to overcome DXVA or VAAPI shortcoming. What about
> AMD? We also have them.
> 
> Simply put, those developers wanted to write those features.
> 
> dxva2 and d3d11va work just fine with AMD hardware, for that matter. And
> nobody stops you or anyone to write similar hwaccels for AMD specific APIs.

If it makes sense I would like to add decoders as well as accelerated converter 
as soon as encoder is in.

Thanks,
Mikhail
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] avcodec/hevc_ps: set correct return value

2017-11-26 Thread Pan Bian
When the call to get_bits_left() fails, the return value is not an
negative error code. This patch explicitly assignes an error code to the
return variable ret.

Signed-off-by: Pan Bian 
---
 libavcodec/hevc_ps.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
index a4f7ed6..f87a577 100644
--- a/libavcodec/hevc_ps.c
+++ b/libavcodec/hevc_ps.c
@@ -1691,6 +1691,7 @@ int ff_hevc_decode_nal_pps(GetBitContext *gb, 
AVCodecContext *avctx,
 if (get_bits_left(gb) < 0) {
 av_log(avctx, AV_LOG_ERROR,
"Overread PPS by %d bits\n", -get_bits_left(gb));
+ret = AVERROR_INVALIDDATA;
 goto err;
 }
 
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 4:06 GMT+01:00 Mironov, Mikhail :

> I think as a side affect we can help integrate Vulkan acceleration to FFmpeg.
> It is much better then OpenCL for multimedia from performance perspective.

Why did I so strongly expect this argument?
(I have neither ever used OpenCL nor Vulkan.)

Others may (and hopefully do) disagree but I believe
you should really change your argumentation.

Sorry, Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 4:00 GMT+01:00 James Almer :
> On 11/26/2017 11:57 PM, Carl Eugen Hoyos wrote:
>> 2017-11-27 3:42 GMT+01:00 James Almer :
>>
>>> No comments about the code, but given this patchset has started a policy
>>> controversy I'll state I'm in favor of including this external header.
>>
>> Will the header work for operating systems other than Windows?
>
> Why are you asking me? I'm not the author of the patch.
>
> And a quick look at the configure change should answer your question.

What I meant was:
The header currently does not help a relevant number of users
(it would make Zeranoe's and Hendrik's life a little easier, that's
all). In the future, it will be useful because AMD plans to provide
a Linux driver. But I have a feeling that the current header will
not work for this future driver, meaning adding the header now
may be counter-productive.

That's completely apart from the fact that this header file does
not comply with any style guide while Nvidia's does (from a
very quick look at both files).

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [FFmpeg-devel 2/2] avformat/mov: return correct value in mov_read_cmov

2017-11-26 Thread Pan Bian
On some failure paths, the error code is not correctly set.

Signed-off-by: Pan Bian 
---
 libavformat/mov.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index d25071b..bbf550f 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -4895,6 +4895,7 @@ static int mov_read_cmov(MOVContext *c, AVIOContext *pb, 
MOVAtom atom)
 if (ret < 0)
 goto free_and_return;
 
+ret = AVERROR_INVALIDDATA;
 if (uncompress (moov_data, (uLongf *) _len, (const Bytef *)cmov_data, 
cmov_len) != Z_OK)
 goto free_and_return;
 if (ffio_init_context(, moov_data, moov_len, 0, NULL, NULL, NULL, 
NULL) != 0)
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [FFmpeg-devel 1/2] avformat/mov: set correct error code in mov_read_custom

2017-11-26 Thread Pan Bian
In function mov_read_custom(), it returns 0 on the path that av_malloc()
returns a NULL pointer. 0 indicates success. An error code should be
assigned to ret.

Signed-off-by: Pan Bian 
---
 libavformat/mov.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavformat/mov.c b/libavformat/mov.c
index ddb1e59..d25071b 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -4177,8 +4177,10 @@ static int mov_read_custom(MOVContext *c, AVIOContext 
*pb, MOVAtom atom)
 break;
 
 *p = av_malloc(len + 1);
-if (!*p)
+if (!*p) {
+ret = AVERROR(ENOMEM);
 break;
+}
 ret = ffio_read_size(pb, *p, len);
 if (ret < 0) {
 av_freep(p);
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread James Almer
On 11/27/2017 12:00 AM, Mironov, Mikhail wrote:
>> -Original Message-
>> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
>> Of Carl Eugen Hoyos
>> Sent: November 26, 2017 9:36 PM
>> To: FFmpeg development discussions and patches > de...@ffmpeg.org>
>> Subject: Re: [FFmpeg-devel] AMD external header
>>
>> 2017-11-27 3:32 GMT+01:00 Mironov, Mikhail
>> :
 -Original Message-
 From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On
>> Behalf
 Of Carl Eugen Hoyos
>>
 Also, imo you have not really explained which users would have an
 advantage if FFmpeg includes the AMD header.
>>>
>>> It is more a question to people who included NVidia headers.
>>
>> I don't think my question can be answered by Nvidia users or developers.
>> You are the exclusive recipient for my question.
>>
>>> It was for convenience, isn't it?
>>
>> That is possible and I assume you want the AMD headers added for
>> convenience but that wasn't my question.
> 
> In my many years of using FFmpeg as a library and as a tool I saw a lot of 
> people who build FFmpeg 
> on they own as I do. One, but not all reasons for a custom build is desire to 
> debug whole application 
> with one tool and on Windows many people still use Visual Studio so they make 
> a build with VS, 
> which is not available in places like zeranoe. 
> Another reason is to make minimum feature set needed to their app.
> At the same time for many real-time applications a software encoder is not an 
> option. 
> So these developers will appreciate default support for both GPUs. Everything 
> related to 
> game streaming (OBS), DVR, could gaming, wireless virtual reality etc. are 
> use cases and real users behind.
> Again we are talking about ease of access, not about impossibility.
> I would like to see that people set few parameters and encoding would happen 
> without additional going through hoops.

Nobody is against adding AMD specific encoders and hwaccels. The issue
for some seems to be the inclusion of external headers.

Currently, nothing in our policy is against it, and as I and Philip
already stated, your additions shouldn't be gated on a potential future
policy change about bundled headers, so your patch can initially go in
with the header.

> I also noticed that recently there is a lot of activity to add full Nvidia 
> decoders  to FFmpeg (VC1, MPEG4, VP8, VP9 etc.). 
> I am guessing this is to overcome DXVA or VAAPI shortcoming. What about AMD? 
> We also have them.

Simply put, those developers wanted to write those features.

dxva2 and d3d11va work just fine with AMD hardware, for that matter. And
nobody stops you or anyone to write similar hwaccels for AMD specific APIs.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc: reset codec on receiving packet after EOF in compat_decode

2017-11-26 Thread Michael Niedermayer
On Sun, Nov 26, 2017 at 12:09:35PM -0300, James Almer wrote:
> On 11/21/2017 6:48 PM, Marton Balint wrote:
> > 
> > 
> > On Thu, 9 Nov 2017, James Cowgill wrote:
> > 
> >> Hi,
> >>
> >> On 09/11/17 14:02, Hendrik Leppkes wrote:
> >>> On Thu, Nov 9, 2017 at 1:21 PM, James Cowgill 
> >>> wrote:
>  In commit 061a0c14bb57 ("decode: restructure the core decoding
>  code"), the
>  deprecated avcodec_decode_* APIs were reworked so that they called
>  into the
>  new avcodec_send_packet / avcodec_receive_frame API. This had the
>  side effect
>  of prohibiting sending new packets containing data after a drain
>  packet, but in previous versions of FFmpeg this "worked" and some
>  applications relied on it.
> 
>  To restore some compatibility, reset the codec if we receive a new
>  non-drain
>  packet using the old API after draining has completed. While this does
>  not give the same behaviour as the old API did, in the majority of
>  cases
>  it works and it does not require changes to any other part of the
>  decoding
>  code.
> 
>  Fixes ticket #6775
>  Signed-off-by: James Cowgill 
>  ---
>   libavcodec/decode.c | 5 +
>   1 file changed, 5 insertions(+)
> 
>  diff --git a/libavcodec/decode.c b/libavcodec/decode.c
>  index 86fe5aef52..2f1932fa85 100644
>  --- a/libavcodec/decode.c
>  +++ b/libavcodec/decode.c
>  @@ -726,6 +726,11 @@ static int compat_decode(AVCodecContext *avctx,
>  AVFrame *frame,
> 
>   av_assert0(avci->compat_decode_consumed == 0);
> 
>  +    if (avci->draining_done && pkt && pkt->size != 0) {
>  +    av_log(avctx, AV_LOG_WARNING, "Got unexpected packet after
>  EOF\n");
>  +    avcodec_flush_buffers(avctx);
>  +    }
>  +
> >>>
> >>> I don't think this is a good idea. Draining and not flushing
> >>> afterwards is a bug in the calling code, and even before recent
> >>> changes it would result in inconsistent behavior and even crashes
> >>> (with select decoders).
> >>
> >> I am fully aware that this will only trigger if the calling code is
> >> buggy. I am trying to avoid silent breakage of those applications doing
> >> this when upgrading to ffmpeg 3.4.
> >>
> >> I was looking at the documentation of avcodec_decode_* recently because
> >> of this and I had some trouble deciding if using the API this way was
> >> incorrect. I expect the downstreams affected thought that what they were
> >> doing was fine and then got angry when ffmpeg suddenly "broke" their
> >> code. This patch at least allows some sort of "transitional period"
> >> until downstreams update.
> > 
> > I think the intent was to flush the codec by passing the NULL packets to
> > it, so it makes a lot of sense to actually do that. Especially since by
> > implicitly doing a flush, we can avoid the undefined behaviour/crashes
> > on the codec side.
> > 
> > Also this is only compatibility code, which probably will be removed at
> > the next bump, I see no harm in making it as compatible as realistically
> > possible.
> 
> The old decode API is not scheduled for removal right now probably
> because 99% of decoders need to be ported.
> This compat code was written so the old API becomes a wrapper for the
> new rather than the other way around, as it was up to 3.3. Supposedly a
> good portion of the versatility of the new API would be handicapped
> otherwise.
> 

> Personally, I think this should be left as is. It is a good incentive
> for downstream to migrate to the new API, as they technically were
> misusing the old API to begin with.

providing compatibility support for an old API that does not actually
work with how applications used the old API is something a tad bit
bizzare

We want to minimize the work everyone has to do.
The more time people have, the more they can spend on improving free
software.

If the old API is going to be removed, any work people have to do
to hunt and track implementation changes in our old API the more
they have wasted time.
If you want people to spend their time on the new API, then you
should not introduce issues in the old API that they need to
workaround
They that way just lost time (debug, fix, test) they could have spend
on the new API or on anything else



> Between fixing their old API usage and migrating, the choice should be
> obvious.
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Asymptotically faster algorithms should always be preferred if you have
asymptotical amounts of data


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Mironov, Mikhail
> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
> Of Carl Eugen Hoyos
> Sent: November 26, 2017 9:57 PM
> To: FFmpeg development discussions and patches  de...@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD
> GPUs based on AMF SDK
> 
> 2017-11-27 3:42 GMT+01:00 James Almer :
> 
> > No comments about the code, but given this patchset has started a
> > policy controversy I'll state I'm in favor of including this external 
> > header.
> 
> Will the header work for operating systems other than Windows?

We are working on Linux version. Before I started FFmpeg integration 
I convinced the management to allocate resources.
It will come in two phases: 
1. AMF via Vulkan on Windows
2. AMF via Vulkan on Linux
I think as a side affect we can help integrate Vulkan acceleration to FFmpeg. 
It is much better then OpenCL for multimedia from performance perspective.

Thank,
Mikhail
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 3:48 GMT+01:00 Philip Langdale :

[...]

Unrelated to this topic:
Please cut your quotes, I believe this is not the first message
where your content is very difficult to find.

Thank you, Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Mironov, Mikhail
> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
> Of Carl Eugen Hoyos
> Sent: November 26, 2017 9:36 PM
> To: FFmpeg development discussions and patches  de...@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] AMD external header
> 
> 2017-11-27 3:32 GMT+01:00 Mironov, Mikhail
> :
> >> -Original Message-
> >> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On
> Behalf
> >> Of Carl Eugen Hoyos
> 
> >> Also, imo you have not really explained which users would have an
> >> advantage if FFmpeg includes the AMD header.
> >
> > It is more a question to people who included NVidia headers.
> 
> I don't think my question can be answered by Nvidia users or developers.
> You are the exclusive recipient for my question.
> 
> > It was for convenience, isn't it?
> 
> That is possible and I assume you want the AMD headers added for
> convenience but that wasn't my question.

In my many years of using FFmpeg as a library and as a tool I saw a lot of 
people who build FFmpeg 
on they own as I do. One, but not all reasons for a custom build is desire to 
debug whole application 
with one tool and on Windows many people still use Visual Studio so they make a 
build with VS, 
which is not available in places like zeranoe. 
Another reason is to make minimum feature set needed to their app.
At the same time for many real-time applications a software encoder is not an 
option. 
So these developers will appreciate default support for both GPUs. Everything 
related to 
game streaming (OBS), DVR, could gaming, wireless virtual reality etc. are use 
cases and real users behind.
Again we are talking about ease of access, not about impossibility.
I would like to see that people set few parameters and encoding would happen 
without additional going through hoops.

I also noticed that recently there is a lot of activity to add full Nvidia 
decoders  to FFmpeg (VC1, MPEG4, VP8, VP9 etc.). 
I am guessing this is to overcome DXVA or VAAPI shortcoming. What about AMD? We 
also have them.

Thanks,
Mikhail
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread James Almer
On 11/26/2017 11:57 PM, Carl Eugen Hoyos wrote:
> 2017-11-27 3:42 GMT+01:00 James Almer :
> 
>> No comments about the code, but given this patchset has started a policy
>> controversy I'll state I'm in favor of including this external header.
> 
> Will the header work for operating systems other than Windows?

Why are you asking me? I'm not the author of the patch.

And a quick look at the configure change should answer your question.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 3:42 GMT+01:00 James Almer :

> No comments about the code, but given this patchset has started a policy
> controversy I'll state I'm in favor of including this external header.

Will the header work for operating systems other than Windows?

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread James Almer
On 11/26/2017 11:36 PM, mmironov wrote:
> From 21d99252fad543d3d27a015912c0458b6ae11e08 Mon Sep 17 00:00:00 2001
> From: mmironov 
> Date: Tue, 14 Nov 2017 17:54:24 -0500
> Subject: [PATCH] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF
>  SDK
> 
> Signed-off-by: mmironov 
> ---
>  Changelog|1 +
>  compat/amd/amfsdkenc.h   | 1755 
> ++
>  configure|   20 +-
>  libavcodec/Makefile  |4 +
>  libavcodec/allcodecs.c   |2 +
>  libavcodec/amfenc.c  |  602 
>  libavcodec/amfenc.h  |  143 
>  libavcodec/amfenc_h264.c |  397 +++
>  libavcodec/amfenc_hevc.c |  327 +
>  9 files changed, 3249 insertions(+), 2 deletions(-)
>  create mode 100644 compat/amd/amfsdkenc.h
>  create mode 100644 libavcodec/amfenc.c
>  create mode 100644 libavcodec/amfenc.h
>  create mode 100644 libavcodec/amfenc_h264.c
>  create mode 100644 libavcodec/amfenc_hevc.c

No comments about the code, but given this patchset has started a policy
controversy I'll state I'm in favor of including this external header.

Not shipping this one for having a working external version but shipping
the Nvidia one for being a custom working version of the unusable shit
they made public is rewarding them for said user unfriendliness, as Mark
clearly stated in a previous thread.
So lets add this one, then figure out if we want any of them at all or
not at a later point. They all can be moved to some external repository
if needed.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 3:32 GMT+01:00 Mironov, Mikhail :
>> -Original Message-
>> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
>> Of Carl Eugen Hoyos

>> Also, imo you have not really explained which users would have an
>> advantage if FFmpeg includes the AMD header.
>
> It is more a question to people who included NVidia headers.

I don't think my question can be answered by Nvidia users
or developers. You are the exclusive recipient for my question.

> It was for convenience, isn't it?

That is possible and I assume you want the AMD headers added
for convenience but that wasn't my question.

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Mironov, Mikhail
> -Original Message-
> From: ffmpeg-devel [mailto:ffmpeg-devel-boun...@ffmpeg.org] On Behalf
> Of Carl Eugen Hoyos
> Sent: November 26, 2017 9:25 PM
> To: FFmpeg development discussions and patches  de...@ffmpeg.org>
> Subject: Re: [FFmpeg-devel] AMD external header
> 
> 2017-11-27 3:15 GMT+01:00 Mironov, Mikhail
> :
> 
> > I will skip all arguments, you already read them.
> 
> That's great, but I believe you forgot to add the fourth option.

I just wanted to start a discussion. Please state what is fourth option?
Though, I put it as a policy which is slightly different from option. 

> 
> Also, imo you have not really explained which users would have an
> advantage if FFmpeg includes the AMD header.

It is more a question to people who included NVidia headers. It was for 
convenience, isn't it?
Are you saying that there is a difference and it is based on easy access of the 
header?
> 
> Carl Eugen
> (who has never used any hardware encoder)
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Thanks,
Mikhail
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] AMD external header

2017-11-26 Thread Carl Eugen Hoyos
2017-11-27 3:15 GMT+01:00 Mironov, Mikhail :

> I will skip all arguments, you already read them.

That's great, but I believe you forgot to add the fourth option.

Also, imo you have not really explained which users
would have an advantage if FFmpeg includes the AMD header.

Carl Eugen
(who has never used any hardware encoder)
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Mironov, Mikhail
> 
> A few minor fixups below.  I would be happy to apply this if it didn't contain
> the external header.
> 
> Thanks,
> 
> - Mark
> 
> 

I will resubmit the changes you mentioned. 
As of header inclusion issue I've sent a separate email.

Thanks,
Mikhail

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] AMD external header

2017-11-26 Thread Mironov, Mikhail
Hi,
I would like to summarize thoughts on several threads on this forum related 
to the issue of including AMD/AMF header file into FFmpeg source tree. 
It looks like they reflect some policies formal or informal.
Mark tried to create some policy regarding this issue but wasn't successful. 
I believe a policy is always created to reach some goal. 
So my summary will be in form of triad: 
policy->goal->possible action
I will skip all arguments, you already read them.
#1
   policy: do not include external headers
   goal: minimize maintenance efforts and increase stability of the project
   action: remove NVidia headers
#2
   policy: keep certain headers in the tree based on some criteria
   goal: provide certain level of convenience for ordinary users
   action: include AMD header
#3
  policy: do whatever is needed to achieve the goal
  goal: achieve neutrality in relation to HW vendors
  action: remove NVidia headers or add AMD header

Since these policies contradict each other, some priorities should be set 
and I don't know how to do it. Personally I like #2 the most, but this is not 
my call.
But my point is that by keeping NVidia headers in the tree and not allowing AMD 
header, 
FFmpeg development team breaks all three policies and do not achieve any goal.
If this is what you want maybe you should state this explicitly on the "About" 
page 
as Mark suggested: 
"No external headers may be added to the ffmpeg tree, unless they are for 
AviSynth or Nvidia"
At least it will be clear for all users and developers. 
You may say that you will include only headers hard to get. But does it mean 
that AMD must 
obscure access to the headers to be included? I hope not.

Thanks,
Mikhail

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH]lavc/gifdec: Do not error out on resolution bigger than screen size

2017-11-26 Thread Carl Eugen Hoyos
Hi!

Attached patch fixes ticket #6874 for me.
I don't think it makes much sense to discuss what the specification
means with "logical screen size" and "raster screen size" and
"physical display": Not only do other decoders accept such files, our
decoder already contains the necessary code to crop the image.
I believe that it could at least be argued that the specification
allows such files.

Please comment, Carl Eugen
From 47f5d312461a0d30cd1e70d819ae1daefbb5eebb Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos 
Date: Mon, 27 Nov 2017 02:57:50 +0100
Subject: [PATCH] lavc/gifdec: Do not error out if resolution is bigger than
 screen dimension.

This is what other decoders do.

Fixes ticket #6874.
---
 libavcodec/gifdec.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/gifdec.c b/libavcodec/gifdec.c
index 2eeed4c..59d866b 100644
--- a/libavcodec/gifdec.c
+++ b/libavcodec/gifdec.c
@@ -179,11 +179,11 @@ static int gif_read_image(GifState *s, AVFrame *frame)
 }
 
 /* verify that all the image is inside the screen dimensions */
-if (!width || width > s->screen_width || left >= s->screen_width) {
+if (!width || left >= s->screen_width) {
 av_log(s->avctx, AV_LOG_ERROR, "Invalid image width.\n");
 return AVERROR_INVALIDDATA;
 }
-if (!height || height > s->screen_height || top >= s->screen_height) {
+if (!height || top >= s->screen_height) {
 av_log(s->avctx, AV_LOG_ERROR, "Invalid image height.\n");
 return AVERROR_INVALIDDATA;
 }
-- 
1.7.10.4

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] avcodec/nvenc: set correct error code

2017-11-26 Thread Pan Bian
In function process_output_surface(), the return value is 0 on the path
that av_mallocz() returns a NULL pointer. 0 indicates success, which
deviates from the fact. Return "AVERROR(ENOMEM)" instead of "0".

Signed-off-by: Pan Bian 
---
 libavcodec/nvenc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 79f7dce..1506062 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -1763,8 +1763,10 @@ static int process_output_surface(AVCodecContext *avctx, 
AVPacket *pkt, NvencSur
 }
 slice_offsets = av_mallocz(slice_mode_data * sizeof(*slice_offsets));
 
-if (!slice_offsets)
+if (!slice_offsets) {
+res = AVERROR(ENOMEM);
 goto error;
+}
 
 lock_params.version = NV_ENC_LOCK_BITSTREAM_VER;
 
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] libavformat/rtpenc_mpegts: check avformat_new_stream() return value

2017-11-26 Thread Pan Bian
The function avformat_new_stream() returns a NULL pointer on failure.
However, in function rtp_mpegts_write_header(), its return value is not
validated before it is dereferenced. Check the return value against NULL
to avoid potential NULL dereference.

Signed-off-by: Pan Bian 
---
 libavformat/rtpenc_mpegts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavformat/rtpenc_mpegts.c b/libavformat/rtpenc_mpegts.c
index 7af02e0..9089d61 100644
--- a/libavformat/rtpenc_mpegts.c
+++ b/libavformat/rtpenc_mpegts.c
@@ -85,6 +85,10 @@ static int rtp_mpegts_write_header(AVFormatContext *s)
 }
 rtp_ctx->oformat = rtp_format;
 st = avformat_new_stream(rtp_ctx, NULL);
+   if (!st) {
+ret = AVERROR(ENOMEM);
+goto fail;
+   }
 st->time_base.num   = 1;
 st->time_base.den   = 9;
 st->codecpar->codec_id = AV_CODEC_ID_MPEG2TS;
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] ffmpeg: Check read_ffserver_streams() return value

2017-11-26 Thread Rostislav Pehlivanov
On 27 November 2017 at 01:12, Pan Bian  wrote:

> The function avformat_alloc_context() will return a NULL pointer on
> failure. However, in function read_ffserver_streams(), its return value
> is not validated and the subsequent dereference may result in a bad
> memory access bug. Check its return value against NULL and avoid
> potential NULL dereference.
>
> Signed-off-by: Pan Bian 
> ---
>  fftools/ffmpeg_opt.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
> index 9445a2d..7e51c5a 100644
> --- a/fftools/ffmpeg_opt.c
> +++ b/fftools/ffmpeg_opt.c
> @@ -2000,6 +2000,8 @@ static int read_ffserver_streams(OptionsContext *o,
> AVFormatContext *s, const ch
>  {
>  int i, err;
>  AVFormatContext *ic = avformat_alloc_context();
> +   if (!ic)
> +   return AVERROR(ENOMEM);
>
>  ic->interrupt_callback = int_cb;
>  err = avformat_open_input(, filename, NULL, NULL);
> --
> 1.9.1
>
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

Pointless as ffserver's getting removed in a day or two
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] ffmpeg: Check read_ffserver_streams() return value

2017-11-26 Thread Pan Bian
The function avformat_alloc_context() will return a NULL pointer on
failure. However, in function read_ffserver_streams(), its return value
is not validated and the subsequent dereference may result in a bad
memory access bug. Check its return value against NULL and avoid
potential NULL dereference.

Signed-off-by: Pan Bian 
---
 fftools/ffmpeg_opt.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fftools/ffmpeg_opt.c b/fftools/ffmpeg_opt.c
index 9445a2d..7e51c5a 100644
--- a/fftools/ffmpeg_opt.c
+++ b/fftools/ffmpeg_opt.c
@@ -2000,6 +2000,8 @@ static int read_ffserver_streams(OptionsContext *o, 
AVFormatContext *s, const ch
 {
 int i, err;
 AVFormatContext *ic = avformat_alloc_context();
+   if (!ic)
+   return AVERROR(ENOMEM);
 
 ic->interrupt_callback = int_cb;
 err = avformat_open_input(, filename, NULL, NULL);
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] avcodec/samidec: check av_strdup() return value

2017-11-26 Thread Pan Bian
From: Pan Bian 

In function sami_paragraph_to_ass(), the return value of av_strdup() is
not checked. To avoid potential NULL dereference, the return value
should be checked against NULL.

Signed-off-by: Pan Bian 
---
 libavcodec/samidec.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavcodec/samidec.c b/libavcodec/samidec.c
index 2620424..35fcc41 100644
--- a/libavcodec/samidec.c
+++ b/libavcodec/samidec.c
@@ -48,6 +48,9 @@ static int sami_paragraph_to_ass(AVCodecContext *avctx, const 
char *src)
 AVBPrint *dst_content = >encoded_content;
 AVBPrint *dst_source = >encoded_source;
 
+   if (!dupsrc)
+   return AVERROR(ENOMEM);
+
 av_bprint_clear(>encoded_content);
 av_bprint_clear(>content);
 av_bprint_clear(>encoded_source);
-- 
1.9.1


___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF SDK

2017-11-26 Thread Mark Thompson
On 22/11/17 23:28, mmironov wrote:
> From c669277afd764903d3da09d92a263d0fb58e24b1 Mon Sep 17 00:00:00 2001
> From: mmironov 
> Date: Tue, 14 Nov 2017 17:54:24 -0500
> Subject: [PATCH] Added HW H.264 and HEVC encoding for AMD GPUs based on AMF
>  SDK
> 
> Signed-off-by: mmironov 
> ---
>  Changelog|1 +
>  compat/amd/amfsdkenc.h   | 1755 
> ++
>  configure|   18 +-
>  libavcodec/Makefile  |4 +
>  libavcodec/allcodecs.c   |2 +
>  libavcodec/amfenc.c  |  596 
>  libavcodec/amfenc.h  |  143 
>  libavcodec/amfenc_h264.c |  397 +++
>  libavcodec/amfenc_hevc.c |  327 +
>  9 files changed, 3242 insertions(+), 1 deletion(-)
>  create mode 100644 compat/amd/amfsdkenc.h
>  create mode 100644 libavcodec/amfenc.c
>  create mode 100644 libavcodec/amfenc.h
>  create mode 100644 libavcodec/amfenc_h264.c
>  create mode 100644 libavcodec/amfenc_hevc.c

A few minor fixups below.  I would be happy to apply this if it didn't contain 
the external header.

Thanks,

- Mark


> diff --git a/Changelog b/Changelog
> index 68829f2..e5e5ffd 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -15,6 +15,7 @@ version :
>  - Raw aptX muxer and demuxer
>  - NVIDIA NVDEC-accelerated H.264, HEVC and VP9 hwaccel decoding
>  - Intel QSV-accelerated overlay filter
> +- AMD NW H.264 and HEVC encoders

NW?

>  
>  
>  version 3.4:
> diff --git a/compat/amd/amfsdkenc.h b/compat/amd/amfsdkenc.h
> new file mode 100644
> index 000..282656d
> --- /dev/null
> +++ b/compat/amd/amfsdkenc.h
> @@ -0,0 +1,1755 @@
> ...
> diff --git a/configure b/configure
> index 3788f26..a562a2a 100755
> --- a/configure
> +++ b/configure
> @@ -303,6 +303,7 @@ External library support:
>--disable-zlib   disable zlib [autodetect]
>  
>The following libraries provide various hardware acceleration features:
> +  --disable-amfdisable AMF video encoding code [autodetect]
>--disable-audiotoolbox   disable Apple AudioToolbox code [autodetect]
>--disable-cuda   disable dynamically linked Nvidia CUDA code 
> [autodetect]
>--enable-cuda-sdkenable CUDA features that require the CUDA SDK 
> [no]
> @@ -1639,6 +1640,7 @@ EXTERNAL_LIBRARY_LIST="
>  "
>  
>  HWACCEL_AUTODETECT_LIBRARY_LIST="
> +amf
>  audiotoolbox
>  crystalhd
>  cuda
> @@ -2781,12 +2783,15 @@ scale_npp_filter_deps="cuda libnpp"
>  scale_cuda_filter_deps="cuda_sdk"
>  thumbnail_cuda_filter_deps="cuda_sdk"
>  
> +amf_deps_any="libdl LoadLibrary"
> +
>  nvenc_deps="cuda"
>  nvenc_deps_any="libdl LoadLibrary"
>  nvenc_encoder_deps="nvenc"
>  
>  h263_v4l2m2m_decoder_deps="v4l2_m2m h263_v4l2_m2m"
>  h263_v4l2m2m_encoder_deps="v4l2_m2m h263_v4l2_m2m"
> +h264_amf_encoder_deps="amf"
>  h264_crystalhd_decoder_select="crystalhd h264_mp4toannexb_bsf h264_parser"
>  h264_cuvid_decoder_deps="cuvid"
>  h264_cuvid_decoder_select="h264_mp4toannexb_bsf"
> @@ -2803,6 +2808,7 @@ 
> h264_vaapi_encoder_deps="VAEncPictureParameterBufferH264"
>  h264_vaapi_encoder_select="cbs_h264 vaapi_encode"
>  h264_v4l2m2m_decoder_deps="v4l2_m2m h264_v4l2_m2m"
>  h264_v4l2m2m_encoder_deps="v4l2_m2m h264_v4l2_m2m"
> +hevc_amf_encoder_deps="amf"
>  hevc_cuvid_decoder_deps="cuvid"
>  hevc_cuvid_decoder_select="hevc_mp4toannexb_bsf"
>  hevc_mediacodec_decoder_deps="mediacodec"
> @@ -6164,9 +6170,12 @@ if enabled x86; then
>  mingw32*|mingw64*|win32|win64|linux|cygwin*)
>  ;;
>  *)
> -disable cuda cuvid nvdec nvenc
> +disable cuda cuvid nvdec nvenc amf
>  ;;
>  esac
> +if test $target_os = "linux"; then
> +disable amf
> +fi
>  else
>  disable cuda cuvid nvdec nvenc

amf here too?

>  fi
> @@ -6179,6 +6188,13 @@ void f(void) { struct { const GUID guid; } s[] = { { 
> NV_ENC_PRESET_HQ_GUID } };
>  int main(void) { return 0; }
>  EOF
>  
> +enabled amf &&
> +check_cc -I$source_path < +#include "compat/amd/amfsdkenc.h"
> +AMFFactory *factory;
> +int main(void) { return 0; }
> +EOF
> +
>  # Funny iconv installations are not unusual, so check it after all flags 
> have been set
>  if enabled libc_iconv; then
>  check_func_headers iconv.h iconv
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 2476aec..9bbb60e 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -55,6 +55,7 @@ OBJS = ac3_parser.o 
> \
>  OBJS-$(CONFIG_AANDCTTABLES)+= aandcttab.o
>  OBJS-$(CONFIG_AC3DSP)  += ac3dsp.o ac3.o ac3tab.o
>  OBJS-$(CONFIG_ADTS_HEADER) += adts_header.o mpeg4audio.o
> +OBJS-$(CONFIG_AMF) += amfenc.o
>  OBJS-$(CONFIG_AUDIO_FRAME_QUEUE)   += audio_frame_queue.o
>  OBJS-$(CONFIG_AUDIODSP)+= audiodsp.o
>  OBJS-$(CONFIG_BLOCKDSP)+= blockdsp.o
> @@ -332,6 

Re: [FFmpeg-devel] [PATCH 3/3] tests/fate-run: Use -bitexact

2017-11-26 Thread Michael Niedermayer
On Sun, Oct 22, 2017 at 01:41:58AM +0200, Michael Niedermayer wrote:
> Signed-off-by: Michael Niedermayer 
> ---
>  tests/fate-run.sh | 24 
>  1 file changed, 12 insertions(+), 12 deletions(-)

will apply


[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Frequently ignored answer#1 FFmpeg bugs should be sent to our bugtracker. User
questions about the command line tools should be sent to the ffmpeg-user ML.
And questions about how to use libav* should be sent to the libav-user ML.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/mlpdsp: Fix signed integer overflow, 2nd try

2017-11-26 Thread Michael Niedermayer
On Mon, Nov 20, 2017 at 09:26:48PM +0100, Michael Niedermayer wrote:
> The outputted bits should match what is used in the lossless check
> 
> Fixes: runtime error: signed integer overflow: -538697856 * 256 cannot be 
> represented in type 'int'
> Fixes: 4326/clusterfuzz-testcase-minimized-5689449645080576
> 
> Found-by: continuous fuzzing process 
> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> Signed-off-by: Michael Niedermayer 
> ---
>  libavcodec/mlpdsp.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

applied

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec/h264idct_template: Fix integer overflow in ff_h264_idct8_add

2017-11-26 Thread Michael Niedermayer
On Mon, Nov 20, 2017 at 02:58:15PM +0100, Michael Niedermayer wrote:
> Fixes: signed integer overflow: 452986184 - -2113885312 cannot be represented 
> in type 'int'
> Fixes: 4196/clusterfuzz-testcase-minimized-5580648594014208
> 
> Found-by: continuous fuzzing process 
> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> Signed-off-by: Michael Niedermayer 
> ---
>  libavcodec/h264idct_template.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)

applied

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Let us carefully observe those good qualities wherein our enemies excel us
and endeavor to excel them, by avoiding what is faulty, and imitating what
is excellent in them. -- Plutarch


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] libavformat/mov: Replace duplicate stream_nb check by assert

2017-11-26 Thread Michael Niedermayer
On Wed, Nov 22, 2017 at 08:19:45PM +, Derek Buitenhuis wrote:
> On 11/22/2017 8:09 PM, Michael Niedermayer wrote:
> > not much, no
> > its a non static function tough
> > i can remove the check completely if thats preferred ?
> 
> I guess leave it since it's non-static.
> 
> LGTM.

ok, will apply

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Those who would give up essential Liberty, to purchase a little
temporary Safety, deserve neither Liberty nor Safety -- Benjamin Franklin


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/2] avcodec/kgv1dec: Check that there is enough input for maximum RLE compression

2017-11-26 Thread Michael Niedermayer
On Wed, Nov 22, 2017 at 09:00:57PM +0100, Michael Niedermayer wrote:
> Fixes: Timeout
> Fixes: 4271/clusterfuzz-testcase-4676667768307712
> 
> Found-by: continuous fuzzing process 
> https://github.com/google/oss-fuzz/tree/master/projects/ffmpeg
> Signed-off-by: Michael Niedermayer 
> ---
>  libavcodec/kgv1dec.c | 3 +++
>  1 file changed, 3 insertions(+)

will apply

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Opposition brings concord. Out of discord comes the fairest harmony.
-- Heraclitus


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: Implement vp8 nvdec hwaccel

2017-11-26 Thread Philip Langdale
On Sun, 26 Nov 2017 22:35:58 +
Mark Thompson  wrote:

> On 26/11/17 22:04, Philip Langdale wrote:
> > Signed-off-by: Philip Langdale 
> > ---
> >  Changelog  |  2 +-
> >  configure  |  2 ++
> >  libavcodec/Makefile|  1 +
> >  libavcodec/hwaccels.h  |  1 +
> >  libavcodec/nvdec.c |  1 +
> >  libavcodec/nvdec_vp8.c | 97
> > ++
> > libavcodec/version.h   |  3 +- libavcodec/vp8.c   |  6 
> >  8 files changed, 111 insertions(+), 2 deletions(-)
> >  create mode 100644 libavcodec/nvdec_vp8.c
> > 
> > diff --git a/Changelog b/Changelog
> > index e3092e211f..4db1d57721 100644
> > --- a/Changelog
> > +++ b/Changelog
> > @@ -13,7 +13,7 @@ version :
> >  - PCE support for extended channel layouts in the AAC encoder
> >  - native aptX encoder and decoder
> >  - Raw aptX muxer and demuxer
> > -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9
> > hwaccel decoding +- NVIDIA NVDEC-accelerated H.264, HEVC,
> > MPEG-1/2/4, VC1, VP8 and VP9 hwaccel decoding
> >  - Intel QSV-accelerated overlay filter
> >  - mcompand audio filter
> >  - acontrast audio filter
> > diff --git a/configure b/configure
> > index bc00b71489..e5fa61e83d 100755
> > --- a/configure
> > +++ b/configure
> > @@ -2748,6 +2748,8 @@ vc1_vaapi_hwaccel_deps="vaapi"
> >  vc1_vaapi_hwaccel_select="vc1_decoder"
> >  vc1_vdpau_hwaccel_deps="vdpau"
> >  vc1_vdpau_hwaccel_select="vc1_decoder"
> > +vp8_nvdec_hwaccel_deps="nvdec"
> > +vp8_nvdec_hwaccel_select="vp8_decoder"
> >  vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8"
> >  vp8_vaapi_hwaccel_select="vp8_decoder"
> >  vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
> > diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> > index 640edfb590..ca7960cdf4 100644
> > --- a/libavcodec/Makefile
> > +++ b/libavcodec/Makefile
> > @@ -872,6 +872,7 @@ OBJS-$(CONFIG_VC1_NVDEC_HWACCEL)  +=
> > nvdec_vc1.o OBJS-$(CONFIG_VC1_QSV_HWACCEL)+=
> > qsvdec_other.o OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)  +=
> > vaapi_vc1.o OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)  += vdpau_vc1.o
> > +OBJS-$(CONFIG_VP8_NVDEC_HWACCEL)  += nvdec_vp8.o
> >  OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)  += vaapi_vp8.o
> >  OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)+= dxva2_vp9.o
> >  OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)  += dxva2_vp9.o
> > diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> > index cefd2b15be..420e2feeea 100644
> > --- a/libavcodec/hwaccels.h
> > +++ b/libavcodec/hwaccels.h
> > @@ -60,6 +60,7 @@ extern const AVHWAccel ff_vc1_dxva2_hwaccel;
> >  extern const AVHWAccel ff_vc1_nvdec_hwaccel;
> >  extern const AVHWAccel ff_vc1_vaapi_hwaccel;
> >  extern const AVHWAccel ff_vc1_vdpau_hwaccel;
> > +extern const AVHWAccel ff_vp8_nvdec_hwaccel;
> >  extern const AVHWAccel ff_vp8_vaapi_hwaccel;
> >  extern const AVHWAccel ff_vp9_d3d11va_hwaccel;
> >  extern const AVHWAccel ff_vp9_d3d11va2_hwaccel;
> > diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> > index da4451a739..c7a02ff40f 100644
> > --- a/libavcodec/nvdec.c
> > +++ b/libavcodec/nvdec.c
> > @@ -58,6 +58,7 @@ static int map_avcodec_id(enum AVCodecID id)
> >  case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
> >  case AV_CODEC_ID_MPEG4:  return cudaVideoCodec_MPEG4;
> >  case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1;
> > +case AV_CODEC_ID_VP8:return cudaVideoCodec_VP8;
> >  case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9;
> >  case AV_CODEC_ID_WMV3:   return cudaVideoCodec_VC1;
> >  }
> > diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c
> > new file mode 100644
> > index 00..6fc0ac7ded
> > --- /dev/null
> > +++ b/libavcodec/nvdec_vp8.c
> > @@ -0,0 +1,97 @@
> > +/*
> > + * VP8 HW decode acceleration through NVDEC
> > + *
> > + * Copyright (c) 2017 Philip Langdale
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later
> > version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> > 02110-1301 USA
> > + */
> > +
> > +#include "avcodec.h"
> > +#include "nvdec.h"
> > +#include "decode.h"
> > +#include "internal.h"
> > +#include "vp8.h"
> > +
> > +static unsigned char safe_get_ref_idx(VP8Frame *frame)
> > 

Re: [FFmpeg-devel] [PATCH 7/8] lavc/flacenc: add AVX2 version of the 32-bit LPC encoder

2017-11-26 Thread James Almer
On 11/26/2017 8:13 PM, Rostislav Pehlivanov wrote:
> On 26 November 2017 at 22:51, James Darnley  wrote:
> 
>> When compared to the SSE4.2 version runtime, is reduced by 1 to 26%.  The
>> function itself is around 2 times faster.
>> ---
>>  libavcodec/x86/flac_dsp_gpl.asm | 56 ++
>> +--
>>  libavcodec/x86/flacdsp_init.c   |  5 +++-
>>  2 files changed, 47 insertions(+), 14 deletions(-)
>>
>> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
>> b/libavcodec/x86/flac_dsp_gpl.asm
>> index 91989ce560..749e66dec8 100644
>> --- a/libavcodec/x86/flac_dsp_gpl.asm
>> +++ b/libavcodec/x86/flac_dsp_gpl.asm
>> @@ -22,11 +22,11 @@
>>
>>  %include "libavutil/x86/x86util.asm"
>>
>> -SECTION_RODATA
>> +SECTION_RODATA 32
>>
>> -pd_0_int_min: times  2 dd 0, -2147483648
>> -pq_int_min:   times  2 dq -2147483648
>> -pq_int_max:   times  2 dq  2147483647
>> +pd_0_int_min: times  4 dd 0, -2147483648
>> +pq_int_min:   times  4 dq -2147483648
>> +pq_int_max:   times  4 dq  2147483647
>>
>>  SECTION .text
>>
>> @@ -123,7 +123,10 @@ RET
>>  %endmacro
>>
>>  %macro PMINSQ 3
>> -pcmpgtq %3, %2, %1
>> +mova%3, %2
>> +; We cannot use the 3-operand format because the memory location
>> cannot be
>> +; the second operand, only the third.
>> +pcmpgtq %3, %1
>>
> 
> I don't get it, how did it work before then?
> 
> 
>>  pand%1, %3
>>  pandn   %3, %2
>>  por %1, %3
>> @@ -177,11 +180,11 @@ learesq,   [resq+orderq*4]
>>  leasmpq,   [smpq+orderq*4]
>>  leacoefsq, [coefsq+orderq*4]
>>  sublength,  orderd
>> -movd   m3,  r5m
>> +movd   xm3, r5m
>>  negorderq
>>
>>  movu   m4, [pd_0_int_min] ; load 1 bit
>> -psrad  m4,  m3; turn that into shift+1 bits
>> +psrad  m4,  xm3   ; turn that into shift+1 bits
>>  pslld  m4,  1 ; reduce that
>>  mova  [rsp],m4; save sign extend mask
>>
>> @@ -197,8 +200,20 @@ mova  [rsp],m4; save sign extend mask
>>  xor  negj, negj
>>
>>  .looporder1:
>> +%if cpuflag(avx)
>> +vbroadcastss m2, [coefsq+posj*4]
>> +%else
>>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>>  SPLATD m2
>> +%endif
>> +%if cpuflag(avx)
>> +vpmuldq  m1, m2, [smpq+negj*4-4]
>> +vpmuldq  m5, m2, [smpq+negj*4-4+mmsize]
>> +vpmuldq  m7, m2, [smpq+negj*4-4+mmsize*2]
>> +vpaddq   m0, m1
>> +vpaddq   m4, m5
>> +vpaddq   m6, m7
>>
> 
> Why force VEX encoding for these instructions, on avx no less?

It's avx2 and using ymm regs, not avx.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 6/8] lavc/x86/flac_dsp_gpl: partially unroll 32-bit LPC encoder

2017-11-26 Thread James Darnley
On 2017-11-27 00:17, Rostislav Pehlivanov wrote:
> On 26 November 2017 at 22:51, James Darnley  wrote:
>> @@ -152,13 +152,13 @@ RET
>>  %macro FUNCTION_BODY_32 0
>>
>>  %if ARCH_X86_64
>> -cglobal flac_enc_lpc_32, 5, 7, 8, mmsize, res, smp, len, order, coefs
>> +cglobal flac_enc_lpc_32, 5, 7, 8, mmsize*4, res, smp, len, order,
>> coefs
>>
> 
> Why x4, shouldn't this be x2?

I write 3 mm registers more to the stack.  The first one is the sign
extension for my hacked qword arithmetic shift added in the first 32-bit
patch.  The new 3 are to store the "odd" values created in the first
inner loop.

I admit that this is a rather ugly construction for a little speed gain
but I think I've seen other ugly things since writing this.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 7/8] lavc/flacenc: add AVX2 version of the 32-bit LPC encoder

2017-11-26 Thread James Almer
On 11/26/2017 7:51 PM, James Darnley wrote:
> When compared to the SSE4.2 version runtime, is reduced by 1 to 26%.  The
> function itself is around 2 times faster.
> ---
>  libavcodec/x86/flac_dsp_gpl.asm | 56 
> +++--
>  libavcodec/x86/flacdsp_init.c   |  5 +++-
>  2 files changed, 47 insertions(+), 14 deletions(-)
> 
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
> index 91989ce560..749e66dec8 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -22,11 +22,11 @@
>  
>  %include "libavutil/x86/x86util.asm"
>  
> -SECTION_RODATA
> +SECTION_RODATA 32
>  
> -pd_0_int_min: times  2 dd 0, -2147483648
> -pq_int_min:   times  2 dq -2147483648
> -pq_int_max:   times  2 dq  2147483647
> +pd_0_int_min: times  4 dd 0, -2147483648
> +pq_int_min:   times  4 dq -2147483648
> +pq_int_max:   times  4 dq  2147483647
>  
>  SECTION .text
>  
> @@ -123,7 +123,10 @@ RET
>  %endmacro
>  
>  %macro PMINSQ 3
> -pcmpgtq %3, %2, %1
> +mova%3, %2
> +; We cannot use the 3-operand format because the memory location cannot 
> be
> +; the second operand, only the third.
> +pcmpgtq %3, %1
>  pand%1, %3
>  pandn   %3, %2
>  por %1, %3
> @@ -177,11 +180,11 @@ learesq,   [resq+orderq*4]
>  leasmpq,   [smpq+orderq*4]
>  leacoefsq, [coefsq+orderq*4]
>  sublength,  orderd
> -movd   m3,  r5m
> +movd   xm3, r5m
>  negorderq
>  
>  movu   m4, [pd_0_int_min] ; load 1 bit
> -psrad  m4,  m3; turn that into shift+1 bits
> +psrad  m4,  xm3   ; turn that into shift+1 bits
>  pslld  m4,  1 ; reduce that
>  mova  [rsp],m4; save sign extend mask
>  
> @@ -197,8 +200,20 @@ mova  [rsp],m4; save sign extend mask
>  xor  negj, negj
>  
>  .looporder1:
> +%if cpuflag(avx)

Either avx2, or check instead for mmsize == 32

> +vbroadcastss m2, [coefsq+posj*4]

vpbroadcastd. Or just use the VPBROADCASTD macro to cover both the avx2
and sse4 cases without ifdeffery.

> +%else
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> +%endif
> +%if cpuflag(avx)
> +vpmuldq  m1, m2, [smpq+negj*4-4]
> +vpmuldq  m5, m2, [smpq+negj*4-4+mmsize]
> +vpmuldq  m7, m2, [smpq+negj*4-4+mmsize*2]
> +vpaddq   m0, m1
> +vpaddq   m4, m5
> +vpaddq   m6, m7
> +%else
>  movu   m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
>  movu   m5,  [smpq+negj*4-4+mmsize]
>  movu   m7,  [smpq+negj*4-4+mmsize*2]
> @@ -212,14 +227,15 @@ mova  [rsp],m4; save sign extend mask
>  paddq  m0,   m1 ; p += c * s
>  paddq  m4,   m5
>  paddq  m6,   m7
> +%endif
>  
>  decnegj
>  incposj
>  jnz .looporder1
>  
> -HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> -HACK_PSRAQ m4, m3, [rsp], m2
> -HACK_PSRAQ m6, m3, [rsp], m2
> +HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, xm3, [rsp], m2
> +HACK_PSRAQ m6, xm3, [rsp], m2
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
>  CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
>  CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> @@ -241,8 +257,20 @@ mova  [rsp],m4; save sign extend mask
>  xor  negj, negj
>  
>  .looporder2:
> +%if cpuflag(avx)
> +vbroadcastss m2, [coefsq+posj*4]

Same

> +%else
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> +%endif
> +%if cpuflag(avx)
> +vpmuldq  m1, m2, [smpq+negj*4]
> +vpmuldq  m5, m2, [smpq+negj*4+mmsize]
> +vpmuldq  m7, m2, [smpq+negj*4+mmsize*2]
> +vpaddq   m0, m1
> +vpaddq   m4, m5
> +vpaddq   m6, m7
> +%else
>  movu   m1,  [smpq+negj*4] ; s = smp[i-j-1]
>  movu   m5,  [smpq+negj*4+mmsize]
>  movu   m7,  [smpq+negj*4+mmsize*2]
> @@ -252,14 +280,15 @@ mova  [rsp],m4; save sign extend mask
>  paddq  m0,   m1 ; p += c * s
>  paddq  m4,   m5
>  paddq  m6,   m7
> +%endif
>  
>  decnegj
>  incposj
>  jnz .looporder2
>  
> -HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> -HACK_PSRAQ m4, m3, [rsp], m2
> -HACK_PSRAQ m6, m3, [rsp], m2
> +HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, xm3, [rsp], m2
> +HACK_PSRAQ m6, xm3, [rsp], m2
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
>  CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
>  CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> @@ -300,3 +329,4 @@ FUNCTION_BODY_32
>  
>  INIT_YMM avx2
>  FUNCTION_BODY_16
> +FUNCTION_BODY_32
> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
> index f827186c26..fbe70894a0 100644
> --- a/libavcodec/x86/flacdsp_init.c
> +++ b/libavcodec/x86/flacdsp_init.c
> @@ 

Re: [FFmpeg-devel] [PATCH 7/8] lavc/flacenc: add AVX2 version of the 32-bit LPC encoder

2017-11-26 Thread James Darnley
On 2017-11-27 00:13, Rostislav Pehlivanov wrote:
> On 26 November 2017 at 22:51, James Darnley  wrote:
>> @@ -123,7 +123,10 @@ RET
>>  %endmacro
>>
>>  %macro PMINSQ 3
>> -pcmpgtq %3, %2, %1
>> +mova%3, %2
>> +; We cannot use the 3-operand format because the memory location
>> cannot be
>> +; the second operand, only the third.
>> +pcmpgtq %3, %1
>>
> 
> I don't get it, how did it work before then?

Easy.  3-operand instructions were never generated using it meaning it
was always emulated with a move.

>> @@ -197,8 +200,20 @@ mova  [rsp],m4; save sign extend mask
>>  xor  negj, negj
>>
>>  .looporder1:
>> +%if cpuflag(avx)
>> +vbroadcastss m2, [coefsq+posj*4]
>> +%else
>>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>>  SPLATD m2
>> +%endif
>> +%if cpuflag(avx)
>> +vpmuldq  m1, m2, [smpq+negj*4-4]
>> +vpmuldq  m5, m2, [smpq+negj*4-4+mmsize]
>> +vpmuldq  m7, m2, [smpq+negj*4-4+mmsize*2]
>> +vpaddq   m0, m1
>> +vpaddq   m4, m5
>> +vpaddq   m6, m7
>>
> 
> Why force VEX encoding for these instructions, on avx no less?

Not sure.  Legacy code written before I knew what I was doing?  Perhaps
some issue arose with the assembler or x86inc at that time and this is
how I worked around it.

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/8] avcodec/flac: add SSE4.2 version of the 32-bit lpc encoder

2017-11-26 Thread James Almer
On 11/26/2017 8:07 PM, Carl Eugen Hoyos wrote:
> 2017-11-26 23:51 GMT+01:00 James Darnley :
> 
>> +if (EXTERNAL_SSE42(cpu_flags)) {
>> +if (CONFIG_GPL)
>> +c->lpc32_encode = ff_flac_enc_lpc_32_sse42;
>> +}
> 
> Any objections over "if (CONFIG_GPL && EXTERNAL_..)"?
> 
> Carl Eugen

I prefer it as is. It's not only similar to other checks around it, but
also if someone decides to write an lgpl sse4.2 function they will not
have to change the existing statement or add a duplicate one.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 4/8] avcodec/flac: partially unroll loop in flac_enc_lpc_32

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> Now does 6 samples per iteration, up from 2.
>
> From 1.6 to 2.1 times faster again.  2.5 to 3.9 times faster overall.
> Runtime is reduced by a further 4 to 17%.  Reduced by 9 to 65% overall.
>
> Same conditions as previously.
> ---
>  libavcodec/x86/flac_dsp_gpl.asm | 30 +-
>  1 file changed, 25 insertions(+), 5 deletions(-)
>
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index 618306eb5f..4d212ed212 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -152,13 +152,13 @@ RET
>  %macro FUNCTION_BODY_32 0
>
>  %if ARCH_X86_64
> -cglobal flac_enc_lpc_32, 5, 7, 4, mmsize, res, smp, len, order, coefs
> +cglobal flac_enc_lpc_32, 5, 7, 8, mmsize, res, smp, len, order, coefs
>  DECLARE_REG_TMP 5, 6
>  %define length r2d
>
>  movsxd orderq, orderd
>  %else
> -cglobal flac_enc_lpc_32, 5, 6, 4, mmsize, res, smp, len, order, coefs
> +cglobal flac_enc_lpc_32, 5, 6, 8, mmsize, res, smp, len, order, coefs
>  DECLARE_REG_TMP 2, 5
>  %define length r2mp
>  %endif
> @@ -190,6 +190,8 @@ mova  [rsp],m4; save sign extend mask
>
>  .looplen:
>  pxor m0,   m0
> +pxor m4,   m4
> +pxor m6,   m6
>  mov  posj, orderq
>  xor  negj, negj
>
> @@ -197,23 +199,41 @@ mova  [rsp],m4; save sign extend mask
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
>  pmovzxdq m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
> +pmovzxdq m5,  [smpq+negj*4-4+mmsize/2]
> +pmovzxdq m7,  [smpq+negj*4-4+mmsize]
>  pmuldq m1,   m2
> +pmuldq m5,   m2
> +pmuldq m7,   m2
>  paddq  m0,   m1 ; p += c * s
> +paddq  m4,   m5
> +paddq  m6,   m7
>
>  decnegj
>  incposj
>  jnz .looporder
>
>  HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, m3, [rsp], m2
> +HACK_PSRAQ m6, m3, [rsp], m2
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
> +CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
> +CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
>  pshufd  m0,m0, q0020 ; pack into first 2 dwords
> +pshufd  m4,m4, q0020
> +pshufd  m6,m6, q0020
>  movhm1,   [smpq]
> +movhm5,   [smpq+mmsize/2]
> +movhm7,   [smpq+mmsize]
>  psubd   m1,m0   ; smp[i] - p
> +psubd   m5,m4
> +psubd   m7,m6
>  movh   [resq], m1   ; res[i] = smp[i] - (p >> shift)
> +movh   [resq+mmsize/2], m5
> +movh   [resq+mmsize], m7
>
> -add resq,   mmsize/2
> -add smpq,   mmsize/2
> -sub length, mmsize/8
> +add resq,   (3*mmsize)/2
> +add smpq,   (3*mmsize)/2
> +sub length, (3*mmsize)/8
>  jg .looplen
>  RET
>
> --
> 2.15.0
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

lgtm, tnx
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 2/8] avcodec/flac: add AVX2 version of the 16-bit LPC encoder

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> When compared to the SSE4 version, runtime is reduced by 0.5 to 20%.
> After a bug fix log, long ago in e609cfd697 the 16-bit lpc encoder is
> used so little that the runtime reduction is no longer correct.  The
> function itself is around 2 times faster.  (As one might expect for
> doing twice as many samples every iteration.)
> ---
>  libavcodec/flacenc.c|  2 +-
>  libavcodec/x86/flac_dsp_gpl.asm | 32 +++-
>  libavcodec/x86/flacdsp_init.c   |  5 +
>  3 files changed, 33 insertions(+), 6 deletions(-)
>
> diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
> index 170c3caf48..cf25982c91 100644
> --- a/libavcodec/flacenc.c
> +++ b/libavcodec/flacenc.c
> @@ -88,7 +88,7 @@ typedef struct FlacSubframe {
>  uint64_t rc_sums[32][MAX_PARTITIONS];
>
>  int32_t samples[FLAC_MAX_BLOCKSIZE];
> -int32_t residual[FLAC_MAX_BLOCKSIZE+11];
> +int32_t residual[FLAC_MAX_BLOCKSIZE+23];
>  } FlacSubframe;
>
>  typedef struct FlacFrame {
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index e285158185..c461c666be 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -24,7 +24,8 @@
>
>  SECTION .text
>
> -INIT_XMM sse4
> +%macro FUNCTION_BODY_16 0
> +
>  %if ARCH_X86_64
>  cglobal flac_enc_lpc_16, 5, 7, 8, 0, res, smp, len, order, coefs
>  DECLARE_REG_TMP 5, 6
> @@ -51,7 +52,7 @@ lea  resq,   [resq+orderq*4]
>  lea  smpq,   [smpq+orderq*4]
>  lea  coefsq, [coefsq+orderq*4]
>  sub  length,  orderd
> -movd m3,  r5m
> +movd xm3, r5m
>  neg  orderq
>
>  %define posj t0q
> @@ -65,8 +66,20 @@ neg  orderq
>  xor  negj, negj
>
>  .looporder:
> +%if cpuflag(avx)
> +vbroadcastss m2, [coefsq+posj*4]
> +%else
>  movd   m2, [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> +%endif
> +%if cpuflag(avx)
> +vpmulld m1, m2, [smpq+negj*4-4]
> +vpmulld m5, m2, [smpq+negj*4-4+mmsize]
> +vpmulld m7, m2, [smpq+negj*4-4+mmsize*2]
> +vpaddd  m0, m1
> +vpaddd  m4, m5
> +vpaddd  m6, m7
>

Same as the 32bit lpc avx2 patch


> +%else
>  movu   m1, [smpq+negj*4-4] ; s = smp[i-j-1]
>  movu   m5, [smpq+negj*4-4+mmsize]
>  movu   m7, [smpq+negj*4-4+mmsize*2]
> @@ -76,14 +89,15 @@ neg  orderq
>  paddd  m0,  m1 ; p += c * s
>  paddd  m4,  m5
>  paddd  m6,  m7
> +%endif
>
>  decnegj
>  incposj
>  jnz .looporder
>
> -psrad  m0, m3  ; p >>= shift
> -psrad  m4, m3
> -psrad  m6, m3
> +psrad  m0, xm3  ; p >>= shift
> +psrad  m4, xm3
> +psrad  m6, xm3
>  movu   m1,[smpq]
>  movu   m5,[smpq+mmsize]
>  movu   m7,[smpq+mmsize*2]
> @@ -99,3 +113,11 @@ neg  orderq
>  sub length, (3*mmsize)/4
>  jg .looplen
>  RET
> +
> +%endmacro
> +
> +INIT_XMM sse4
> +FUNCTION_BODY_16
> +
> +INIT_YMM avx2
> +FUNCTION_BODY_16
> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
> index 1971f81b8d..0a5c01859f 100644
> --- a/libavcodec/x86/flacdsp_init.c
> +++ b/libavcodec/x86/flacdsp_init.c
> @@ -28,6 +28,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int
> coeffs[32], int order,
>  int qlevel, int len);
>
>  void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const
> int32_t *,int);
> +void ff_flac_enc_lpc_16_avx2(int32_t *, const int32_t *, int, int, const
> int32_t *,int);
>
>  #define DECORRELATE_FUNCS(fmt, opt)
> \
>  void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in,
> int channels, \
> @@ -110,6 +111,10 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c,
> enum AVSampleFormat fmt, int
>  if (CONFIG_GPL)
>  c->lpc16_encode = ff_flac_enc_lpc_16_sse4;
>  }
> +if (EXTERNAL_AVX2(cpu_flags)) {
> +if (CONFIG_GPL)
>

yeah, just combine them, if someone wants to add non-gpl asm this is the
least of their problems


> +c->lpc16_encode = ff_flac_enc_lpc_16_avx2;
> +}
>  #endif
>  #endif /* HAVE_X86ASM */
>  }
> --
> 2.15.0
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/8] avcodec/flac: document limitations of the LPC encoder

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> State that the maximum value of order is 32.  This limit is used in both
> C and x86 assebly code.
> ---
>  libavcodec/flacdsp.h | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
> index 7bb0dd0e9a..90fd3f04b5 100644
> --- a/libavcodec/flacdsp.h
> +++ b/libavcodec/flacdsp.h
> @@ -30,6 +30,14 @@ typedef struct FLACDSPContext {
>int qlevel, int len);
>  void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
>int qlevel, int len);
> +
> +/**
> + * These encoder functions support a maximum order of 32.
> + *
> + * This limit is used:
> + * - when CONFIG_SMALL is 0 to unroll a loop in the C template.
> + * - when SSE4 (or newer) is available on x86 to unroll a copy loop.
> + */
>  void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int
> order,
>   const int32_t coefs[32], int shift);
>  void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int
> order,
> --
> 2.15.0
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

lgtm, should have just pushed
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 6/8] lavc/x86/flac_dsp_gpl: partially unroll 32-bit LPC encoder

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> Around 1.1 times faster and reduces runtime by up to 6%.
> ---
>  libavcodec/x86/flac_dsp_gpl.asm | 91 ++
> ++-
>  1 file changed, 72 insertions(+), 19 deletions(-)
>
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index 952fc8b86b..91989ce560 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -152,13 +152,13 @@ RET
>  %macro FUNCTION_BODY_32 0
>
>  %if ARCH_X86_64
> -cglobal flac_enc_lpc_32, 5, 7, 8, mmsize, res, smp, len, order, coefs
> +cglobal flac_enc_lpc_32, 5, 7, 8, mmsize*4, res, smp, len, order,
> coefs
>

Why x4, shouldn't this be x2?


>  DECLARE_REG_TMP 5, 6
>  %define length r2d
>
>  movsxd orderq, orderd
>  %else
> -cglobal flac_enc_lpc_32, 5, 6, 8, mmsize, res, smp, len, order, coefs
> +cglobal flac_enc_lpc_32, 5, 6, 8, mmsize*4, res, smp, len, order,
> coefs
>  DECLARE_REG_TMP 2, 5
>  %define length r2mp
>  %endif
> @@ -189,18 +189,23 @@ mova  [rsp],m4; save sign extend mask
>  %define negj t1q
>
>  .looplen:
> +; process "odd" samples
>  pxor m0,   m0
>  pxor m4,   m4
>  pxor m6,   m6
>  mov  posj, orderq
>  xor  negj, negj
>
> -.looporder:
> +.looporder1:
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> -pmovzxdq m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
> -pmovzxdq m5,  [smpq+negj*4-4+mmsize/2]
> -pmovzxdq m7,  [smpq+negj*4-4+mmsize]
> +movu   m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
> +movu   m5,  [smpq+negj*4-4+mmsize]
> +movu   m7,  [smpq+negj*4-4+mmsize*2]
> +; Rather than explicitly unpack adjacent samples into qwords we
> can let
> +; the pmuldq instruction unpack the 0th and 2nd samples for us
> when it
> +; does its multiply.  This saves an unpack for every sample in
> the inner
> +; loop meaning it should be (much) quicker.
>  pmuldq m1,   m2
>  pmuldq m5,   m2
>  pmuldq m7,   m2
> @@ -210,7 +215,7 @@ mova  [rsp],m4; save sign extend mask
>
>  decnegj
>  incposj
> -jnz .looporder
> +jnz .looporder1
>
>  HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
>  HACK_PSRAQ m4, m3, [rsp], m2
> @@ -218,22 +223,70 @@ mova  [rsp],m4; save sign extend mask
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
>  CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
>  CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> -pshufd  m0,m0, q0020 ; pack into first 2 dwords
> -pshufd  m4,m4, q0020
> -pshufd  m6,m6, q0020
> -movhm1,   [smpq]
> -movhm5,   [smpq+mmsize/2]
> -movhm7,   [smpq+mmsize]
> +movum1,   [smpq]
> +movum5,   [smpq+mmsize]
> +movum7,   [smpq+mmsize*2]
>  psubd   m1,m0   ; smp[i] - p
>  psubd   m5,m4
>  psubd   m7,m6
> -movh   [resq], m1   ; res[i] = smp[i] - (p >> shift)
> -movh   [resq+mmsize/2], m5
> -movh   [resq+mmsize], m7
> +mova   [rsp+mmsize], m1   ; res[i] = smp[i] - (p >> shift)
> +mova   [rsp+mmsize*2], m5
> +mova   [rsp+mmsize*3], m7
> +
> +; process "even" samples
> +pxor m0,   m0
> +pxor m4,   m4
> +pxor m6,   m6
> +mov  posj, orderq
> +xor  negj, negj
> +
> +.looporder2:
> +movd   m2,  [coefsq+posj*4] ; c = coefs[j]
> +SPLATD m2
> +movu   m1,  [smpq+negj*4] ; s = smp[i-j-1]
> +movu   m5,  [smpq+negj*4+mmsize]
> +movu   m7,  [smpq+negj*4+mmsize*2]
> +pmuldq m1,   m2
> +pmuldq m5,   m2
> +pmuldq m7,   m2
> +paddq  m0,   m1 ; p += c * s
> +paddq  m4,   m5
> +paddq  m6,   m7
> +
> +decnegj
> +incposj
> +jnz .looporder2
> +
> +HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, m3, [rsp], m2
> +HACK_PSRAQ m6, m3, [rsp], m2
> +CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
> +CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
> +CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> +movum1,   [smpq+4]
> +movum5,   [smpq+4+mmsize]
> +movum7,   [smpq+4+mmsize*2]
> +psubd   m1,m0   ; smp[i] - p
> +psubd   m5,m4
> +psubd   m7,m6
> +
> +; interleave odd and even samples
> +pslldq  m1, 4
> +pslldq  m5, 4
> +pslldq  m7, 4
> +
> +pblendw m1, [rsp+mmsize], q0303
> +pblendw m5, [rsp+mmsize*2], q0303
> +pblendw m7, [rsp+mmsize*3], q0303
> +
> +movu [resq], m1
> +movu [resq+mmsize], m5
> +movu [resq+mmsize*2], m7
> +
> +add resq,3*mmsize
> +add smpq,3*mmsize
> +sub length, (3*mmsize)/4
>
> -add resq,   (3*mmsize)/2
> -add smpq,   

Re: [FFmpeg-devel] [PATCH] tests/checkasm/float_dsp: Increase allowed difference for float_dsp.vector_dmul

2017-11-26 Thread James Almer
On 11/26/2017 8:09 PM, Michael Niedermayer wrote:
> On Sun, Nov 26, 2017 at 12:47:22AM +0100, Michael Niedermayer wrote:
>> On Sun, Nov 26, 2017 at 12:10:38AM +0100, Michael Niedermayer wrote:
>>> On Fri, Nov 24, 2017 at 11:37:36PM -0300, James Almer wrote:
 On 10/29/2017 11:57 AM, Michael Niedermayer wrote:
> The choosen value is the lowest power of 2 that allows 1000 iterations of 
> fate-checkasm-float_dsp
> to pass on x86-32

 Ticket #6848 reports this value is still not enough. Maybe something
 like 1.0e-12 or 1.0e-13 instead?
>>>
>>> ok, ill push it with 1e-12
>>
>> Or do people prefer this: (this should be more correct)
>>
>> commit 67ba87a320faba623c0b35a0692adb916860ac40 (HEAD -> master)
>> Author: Michael Niedermayer 
>> Date:   Sun Oct 29 15:26:50 2017 +0100
>>
>> tests/checkasm/float_dsp: Increase allowed difference for 
>> float_dsp.vector_dmul
>>
>> Tested for 1 iterations on x86-32
>>
>> Fixes: Ticket6848
>>
>> Signed-off-by: Michael Niedermayer 
> 
> ill push a variant of this so that gets fixed. We can change it
> later if people prefer somthing else
> leaving it open is bad ...

Any solution is fine with me. Thanks for fixing it.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 7/8] lavc/flacenc: add AVX2 version of the 32-bit LPC encoder

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> When compared to the SSE4.2 version runtime, is reduced by 1 to 26%.  The
> function itself is around 2 times faster.
> ---
>  libavcodec/x86/flac_dsp_gpl.asm | 56 ++
> +--
>  libavcodec/x86/flacdsp_init.c   |  5 +++-
>  2 files changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index 91989ce560..749e66dec8 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -22,11 +22,11 @@
>
>  %include "libavutil/x86/x86util.asm"
>
> -SECTION_RODATA
> +SECTION_RODATA 32
>
> -pd_0_int_min: times  2 dd 0, -2147483648
> -pq_int_min:   times  2 dq -2147483648
> -pq_int_max:   times  2 dq  2147483647
> +pd_0_int_min: times  4 dd 0, -2147483648
> +pq_int_min:   times  4 dq -2147483648
> +pq_int_max:   times  4 dq  2147483647
>
>  SECTION .text
>
> @@ -123,7 +123,10 @@ RET
>  %endmacro
>
>  %macro PMINSQ 3
> -pcmpgtq %3, %2, %1
> +mova%3, %2
> +; We cannot use the 3-operand format because the memory location
> cannot be
> +; the second operand, only the third.
> +pcmpgtq %3, %1
>

I don't get it, how did it work before then?


>  pand%1, %3
>  pandn   %3, %2
>  por %1, %3
> @@ -177,11 +180,11 @@ learesq,   [resq+orderq*4]
>  leasmpq,   [smpq+orderq*4]
>  leacoefsq, [coefsq+orderq*4]
>  sublength,  orderd
> -movd   m3,  r5m
> +movd   xm3, r5m
>  negorderq
>
>  movu   m4, [pd_0_int_min] ; load 1 bit
> -psrad  m4,  m3; turn that into shift+1 bits
> +psrad  m4,  xm3   ; turn that into shift+1 bits
>  pslld  m4,  1 ; reduce that
>  mova  [rsp],m4; save sign extend mask
>
> @@ -197,8 +200,20 @@ mova  [rsp],m4; save sign extend mask
>  xor  negj, negj
>
>  .looporder1:
> +%if cpuflag(avx)
> +vbroadcastss m2, [coefsq+posj*4]
> +%else
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> +%endif
> +%if cpuflag(avx)
> +vpmuldq  m1, m2, [smpq+negj*4-4]
> +vpmuldq  m5, m2, [smpq+negj*4-4+mmsize]
> +vpmuldq  m7, m2, [smpq+negj*4-4+mmsize*2]
> +vpaddq   m0, m1
> +vpaddq   m4, m5
> +vpaddq   m6, m7
>

Why force VEX encoding for these instructions, on avx no less?


> +%else
>  movu   m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
>  movu   m5,  [smpq+negj*4-4+mmsize]
>  movu   m7,  [smpq+negj*4-4+mmsize*2]
> @@ -212,14 +227,15 @@ mova  [rsp],m4; save sign extend mask
>  paddq  m0,   m1 ; p += c * s
>  paddq  m4,   m5
>  paddq  m6,   m7
> +%endif
>
>  decnegj
>  incposj
>  jnz .looporder1
>
> -HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> -HACK_PSRAQ m4, m3, [rsp], m2
> -HACK_PSRAQ m6, m3, [rsp], m2
> +HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, xm3, [rsp], m2
> +HACK_PSRAQ m6, xm3, [rsp], m2
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
>  CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
>  CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> @@ -241,8 +257,20 @@ mova  [rsp],m4; save sign extend mask
>  xor  negj, negj
>
>  .looporder2:
> +%if cpuflag(avx)
> +vbroadcastss m2, [coefsq+posj*4]
> +%else
>  movd   m2,  [coefsq+posj*4] ; c = coefs[j]
>  SPLATD m2
> +%endif
> +%if cpuflag(avx)
> +vpmuldq  m1, m2, [smpq+negj*4]
> +vpmuldq  m5, m2, [smpq+negj*4+mmsize]
> +vpmuldq  m7, m2, [smpq+negj*4+mmsize*2]
> +vpaddq   m0, m1
> +vpaddq   m4, m5
> +vpaddq   m6, m7
> +%else
>  movu   m1,  [smpq+negj*4] ; s = smp[i-j-1]
>  movu   m5,  [smpq+negj*4+mmsize]
>  movu   m7,  [smpq+negj*4+mmsize*2]
> @@ -252,14 +280,15 @@ mova  [rsp],m4; save sign extend mask
>  paddq  m0,   m1 ; p += c * s
>  paddq  m4,   m5
>  paddq  m6,   m7
> +%endif
>
>  decnegj
>  incposj
>  jnz .looporder2
>
> -HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
> -HACK_PSRAQ m4, m3, [rsp], m2
> -HACK_PSRAQ m6, m3, [rsp], m2
> +HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
> +HACK_PSRAQ m4, xm3, [rsp], m2
> +HACK_PSRAQ m6, xm3, [rsp], m2
>  CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
>  CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
>  CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
> @@ -300,3 +329,4 @@ FUNCTION_BODY_32
>
>  INIT_YMM avx2
>  FUNCTION_BODY_16
> +FUNCTION_BODY_32
> diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
> index f827186c26..fbe70894a0 100644
> --- a/libavcodec/x86/flacdsp_init.c
> +++ b/libavcodec/x86/flacdsp_init.c
> @@ -30,6 +30,7 @@ void 

Re: [FFmpeg-devel] [PATCH] tests/checkasm/float_dsp: Increase allowed difference for float_dsp.vector_dmul

2017-11-26 Thread Michael Niedermayer
On Sun, Nov 26, 2017 at 12:47:22AM +0100, Michael Niedermayer wrote:
> On Sun, Nov 26, 2017 at 12:10:38AM +0100, Michael Niedermayer wrote:
> > On Fri, Nov 24, 2017 at 11:37:36PM -0300, James Almer wrote:
> > > On 10/29/2017 11:57 AM, Michael Niedermayer wrote:
> > > > The choosen value is the lowest power of 2 that allows 1000 iterations 
> > > > of fate-checkasm-float_dsp
> > > > to pass on x86-32
> > > 
> > > Ticket #6848 reports this value is still not enough. Maybe something
> > > like 1.0e-12 or 1.0e-13 instead?
> > 
> > ok, ill push it with 1e-12
> 
> Or do people prefer this: (this should be more correct)
> 
> commit 67ba87a320faba623c0b35a0692adb916860ac40 (HEAD -> master)
> Author: Michael Niedermayer 
> Date:   Sun Oct 29 15:26:50 2017 +0100
> 
> tests/checkasm/float_dsp: Increase allowed difference for 
> float_dsp.vector_dmul
> 
> Tested for 1 iterations on x86-32
> 
> Fixes: Ticket6848
> 
> Signed-off-by: Michael Niedermayer 

ill push a variant of this so that gets fixed. We can change it
later if people prefer somthing else
leaving it open is bad ...

[...]

-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If you drop bombs on a foreign country and kill a hundred thousand
innocent people, expect your government to call the consequence
"unprovoked inhuman terrorist attacks" and use it to justify dropping
more bombs and killing more people. The technology changed, the idea is old.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/8] avcodec/flac: add SSE4.2 version of the 32-bit lpc encoder

2017-11-26 Thread Carl Eugen Hoyos
2017-11-26 23:51 GMT+01:00 James Darnley :

> +if (EXTERNAL_SSE42(cpu_flags)) {
> +if (CONFIG_GPL)
> +c->lpc32_encode = ff_flac_enc_lpc_32_sse42;
> +}

Any objections over "if (CONFIG_GPL && EXTERNAL_..)"?

Carl Eugen
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 5/8] lavc/x86/flac_dsp_gpl: cosmetic whitespace alignment

2017-11-26 Thread Rostislav Pehlivanov
On 26 November 2017 at 22:51, James Darnley  wrote:

> ---
>  libavcodec/x86/flac_dsp_gpl.asm | 40 --
> --
>  1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/libavcodec/x86/flac_dsp_gpl.asm
> b/libavcodec/x86/flac_dsp_gpl.asm
> index 4d212ed212..952fc8b86b 100644
> --- a/libavcodec/x86/flac_dsp_gpl.asm
> +++ b/libavcodec/x86/flac_dsp_gpl.asm
> @@ -75,42 +75,42 @@ neg  orderq
>  %if cpuflag(avx)
>  vbroadcastss m2, [coefsq+posj*4]
>  %else
> -movd   m2, [coefsq+posj*4] ; c = coefs[j]
> -SPLATD m2
> +movd m2, [coefsq+posj*4] ; c = coefs[j]
> +SPLATD   m2
>  %endif
>  %if cpuflag(avx)
> -vpmulld m1, m2, [smpq+negj*4-4]
> -vpmulld m5, m2, [smpq+negj*4-4+mmsize]
> -vpmulld m7, m2, [smpq+negj*4-4+mmsize*2]
> -vpaddd  m0, m1
> -vpaddd  m4, m5
> -vpaddd  m6, m7
> +vpmulld  m1,  m2, [smpq+negj*4-4]
> +vpmulld  m5,  m2, [smpq+negj*4-4+mmsize]
> +vpmulld  m7,  m2, [smpq+negj*4-4+mmsize*2]
> +vpaddd   m0,  m1
> +vpaddd   m4,  m5
> +vpaddd   m6,  m7
>  %else
> -movu   m1, [smpq+negj*4-4] ; s = smp[i-j-1]
> -movu   m5, [smpq+negj*4-4+mmsize]
> -movu   m7, [smpq+negj*4-4+mmsize*2]
> -pmulld m1,  m2
> -pmulld m5,  m2
> -pmulld m7,  m2
> -paddd  m0,  m1 ; p += c * s
> -paddd  m4,  m5
> -paddd  m6,  m7
> +movu m1, [smpq+negj*4-4] ; s = smp[i-j-1]
> +movu m5, [smpq+negj*4-4+mmsize]
> +movu m7, [smpq+negj*4-4+mmsize*2]
> +pmulld   m1,  m2
> +pmulld   m5,  m2
> +pmulld   m7,  m2
> +padddm0,  m1 ; p += c * s
> +padddm4,  m5
> +padddm6,  m7
>  %endif
>
>  decnegj
>  incposj
>  jnz .looporder
>
> -psrad  m0, xm3  ; p >>= shift
> +psrad  m0, xm3   ; p >>= shift
>  psrad  m4, xm3
>  psrad  m6, xm3
>  movu   m1,[smpq]
>  movu   m5,[smpq+mmsize]
>  movu   m7,[smpq+mmsize*2]
> -psubd  m1, m0  ; smp[i] - p
> +psubd  m1, m0; smp[i] - p
>  psubd  m5, m4
>  psubd  m7, m6
> -movu  [resq],  m1  ; res[i] = smp[i] - (p >> shift)
> +movu  [resq],  m1; res[i] = smp[i] - (p >> shift)
>  movu  [resq+mmsize], m5
>  movu  [resq+mmsize*2], m7
>
> --
> 2.15.0
>
> ___
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

lgtm, should have just pushed this
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avformat/matroskaenc: actually enforce the stream limit

2017-11-26 Thread Michael Niedermayer
On Sun, Nov 26, 2017 at 02:03:09PM -0300, James Almer wrote:
> Prevents out of array accesses. Adressess ticket #6873
> 
> Signed-off-by: James Almer 
> ---
>  libavformat/matroskaenc.c | 7 +++
>  1 file changed, 7 insertions(+)

LGTM

thx

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

It is dangerous to be right in matters on which the established authorities
are wrong. -- Voltaire


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 7/8] lavc/flacenc: add AVX2 version of the 32-bit LPC encoder

2017-11-26 Thread James Darnley
When compared to the SSE4.2 version runtime, is reduced by 1 to 26%.  The
function itself is around 2 times faster.
---
 libavcodec/x86/flac_dsp_gpl.asm | 56 +++--
 libavcodec/x86/flacdsp_init.c   |  5 +++-
 2 files changed, 47 insertions(+), 14 deletions(-)

diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index 91989ce560..749e66dec8 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -22,11 +22,11 @@
 
 %include "libavutil/x86/x86util.asm"
 
-SECTION_RODATA
+SECTION_RODATA 32
 
-pd_0_int_min: times  2 dd 0, -2147483648
-pq_int_min:   times  2 dq -2147483648
-pq_int_max:   times  2 dq  2147483647
+pd_0_int_min: times  4 dd 0, -2147483648
+pq_int_min:   times  4 dq -2147483648
+pq_int_max:   times  4 dq  2147483647
 
 SECTION .text
 
@@ -123,7 +123,10 @@ RET
 %endmacro
 
 %macro PMINSQ 3
-pcmpgtq %3, %2, %1
+mova%3, %2
+; We cannot use the 3-operand format because the memory location cannot be
+; the second operand, only the third.
+pcmpgtq %3, %1
 pand%1, %3
 pandn   %3, %2
 por %1, %3
@@ -177,11 +180,11 @@ learesq,   [resq+orderq*4]
 leasmpq,   [smpq+orderq*4]
 leacoefsq, [coefsq+orderq*4]
 sublength,  orderd
-movd   m3,  r5m
+movd   xm3, r5m
 negorderq
 
 movu   m4, [pd_0_int_min] ; load 1 bit
-psrad  m4,  m3; turn that into shift+1 bits
+psrad  m4,  xm3   ; turn that into shift+1 bits
 pslld  m4,  1 ; reduce that
 mova  [rsp],m4; save sign extend mask
 
@@ -197,8 +200,20 @@ mova  [rsp],m4; save sign extend mask
 xor  negj, negj
 
 .looporder1:
+%if cpuflag(avx)
+vbroadcastss m2, [coefsq+posj*4]
+%else
 movd   m2,  [coefsq+posj*4] ; c = coefs[j]
 SPLATD m2
+%endif
+%if cpuflag(avx)
+vpmuldq  m1, m2, [smpq+negj*4-4]
+vpmuldq  m5, m2, [smpq+negj*4-4+mmsize]
+vpmuldq  m7, m2, [smpq+negj*4-4+mmsize*2]
+vpaddq   m0, m1
+vpaddq   m4, m5
+vpaddq   m6, m7
+%else
 movu   m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
 movu   m5,  [smpq+negj*4-4+mmsize]
 movu   m7,  [smpq+negj*4-4+mmsize*2]
@@ -212,14 +227,15 @@ mova  [rsp],m4; save sign extend mask
 paddq  m0,   m1 ; p += c * s
 paddq  m4,   m5
 paddq  m6,   m7
+%endif
 
 decnegj
 incposj
 jnz .looporder1
 
-HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
-HACK_PSRAQ m4, m3, [rsp], m2
-HACK_PSRAQ m6, m3, [rsp], m2
+HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
+HACK_PSRAQ m4, xm3, [rsp], m2
+HACK_PSRAQ m6, xm3, [rsp], m2
 CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
 CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
 CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
@@ -241,8 +257,20 @@ mova  [rsp],m4; save sign extend mask
 xor  negj, negj
 
 .looporder2:
+%if cpuflag(avx)
+vbroadcastss m2, [coefsq+posj*4]
+%else
 movd   m2,  [coefsq+posj*4] ; c = coefs[j]
 SPLATD m2
+%endif
+%if cpuflag(avx)
+vpmuldq  m1, m2, [smpq+negj*4]
+vpmuldq  m5, m2, [smpq+negj*4+mmsize]
+vpmuldq  m7, m2, [smpq+negj*4+mmsize*2]
+vpaddq   m0, m1
+vpaddq   m4, m5
+vpaddq   m6, m7
+%else
 movu   m1,  [smpq+negj*4] ; s = smp[i-j-1]
 movu   m5,  [smpq+negj*4+mmsize]
 movu   m7,  [smpq+negj*4+mmsize*2]
@@ -252,14 +280,15 @@ mova  [rsp],m4; save sign extend mask
 paddq  m0,   m1 ; p += c * s
 paddq  m4,   m5
 paddq  m6,   m7
+%endif
 
 decnegj
 incposj
 jnz .looporder2
 
-HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
-HACK_PSRAQ m4, m3, [rsp], m2
-HACK_PSRAQ m6, m3, [rsp], m2
+HACK_PSRAQ m0, xm3, [rsp], m2; p >>= shift
+HACK_PSRAQ m4, xm3, [rsp], m2
+HACK_PSRAQ m6, xm3, [rsp], m2
 CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
 CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
 CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
@@ -300,3 +329,4 @@ FUNCTION_BODY_32
 
 INIT_YMM avx2
 FUNCTION_BODY_16
+FUNCTION_BODY_32
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index f827186c26..fbe70894a0 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -30,6 +30,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int 
coeffs[32], int order,
 void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 void ff_flac_enc_lpc_16_avx2(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 void ff_flac_enc_lpc_32_sse42(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
+void ff_flac_enc_lpc_32_avx2(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 
 #define DECORRELATE_FUNCS(fmt, opt)  

[FFmpeg-devel] [PATCH 6/8] lavc/x86/flac_dsp_gpl: partially unroll 32-bit LPC encoder

2017-11-26 Thread James Darnley
Around 1.1 times faster and reduces runtime by up to 6%.
---
 libavcodec/x86/flac_dsp_gpl.asm | 91 -
 1 file changed, 72 insertions(+), 19 deletions(-)

diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index 952fc8b86b..91989ce560 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -152,13 +152,13 @@ RET
 %macro FUNCTION_BODY_32 0
 
 %if ARCH_X86_64
-cglobal flac_enc_lpc_32, 5, 7, 8, mmsize, res, smp, len, order, coefs
+cglobal flac_enc_lpc_32, 5, 7, 8, mmsize*4, res, smp, len, order, coefs
 DECLARE_REG_TMP 5, 6
 %define length r2d
 
 movsxd orderq, orderd
 %else
-cglobal flac_enc_lpc_32, 5, 6, 8, mmsize, res, smp, len, order, coefs
+cglobal flac_enc_lpc_32, 5, 6, 8, mmsize*4, res, smp, len, order, coefs
 DECLARE_REG_TMP 2, 5
 %define length r2mp
 %endif
@@ -189,18 +189,23 @@ mova  [rsp],m4; save sign extend mask
 %define negj t1q
 
 .looplen:
+; process "odd" samples
 pxor m0,   m0
 pxor m4,   m4
 pxor m6,   m6
 mov  posj, orderq
 xor  negj, negj
 
-.looporder:
+.looporder1:
 movd   m2,  [coefsq+posj*4] ; c = coefs[j]
 SPLATD m2
-pmovzxdq m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
-pmovzxdq m5,  [smpq+negj*4-4+mmsize/2]
-pmovzxdq m7,  [smpq+negj*4-4+mmsize]
+movu   m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
+movu   m5,  [smpq+negj*4-4+mmsize]
+movu   m7,  [smpq+negj*4-4+mmsize*2]
+; Rather than explicitly unpack adjacent samples into qwords we can let
+; the pmuldq instruction unpack the 0th and 2nd samples for us when it
+; does its multiply.  This saves an unpack for every sample in the 
inner
+; loop meaning it should be (much) quicker.
 pmuldq m1,   m2
 pmuldq m5,   m2
 pmuldq m7,   m2
@@ -210,7 +215,7 @@ mova  [rsp],m4; save sign extend mask
 
 decnegj
 incposj
-jnz .looporder
+jnz .looporder1
 
 HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
 HACK_PSRAQ m4, m3, [rsp], m2
@@ -218,22 +223,70 @@ mova  [rsp],m4; save sign extend mask
 CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
 CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
 CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
-pshufd  m0,m0, q0020 ; pack into first 2 dwords
-pshufd  m4,m4, q0020
-pshufd  m6,m6, q0020
-movhm1,   [smpq]
-movhm5,   [smpq+mmsize/2]
-movhm7,   [smpq+mmsize]
+movum1,   [smpq]
+movum5,   [smpq+mmsize]
+movum7,   [smpq+mmsize*2]
 psubd   m1,m0   ; smp[i] - p
 psubd   m5,m4
 psubd   m7,m6
-movh   [resq], m1   ; res[i] = smp[i] - (p >> shift)
-movh   [resq+mmsize/2], m5
-movh   [resq+mmsize], m7
+mova   [rsp+mmsize], m1   ; res[i] = smp[i] - (p >> shift)
+mova   [rsp+mmsize*2], m5
+mova   [rsp+mmsize*3], m7
+
+; process "even" samples
+pxor m0,   m0
+pxor m4,   m4
+pxor m6,   m6
+mov  posj, orderq
+xor  negj, negj
+
+.looporder2:
+movd   m2,  [coefsq+posj*4] ; c = coefs[j]
+SPLATD m2
+movu   m1,  [smpq+negj*4] ; s = smp[i-j-1]
+movu   m5,  [smpq+negj*4+mmsize]
+movu   m7,  [smpq+negj*4+mmsize*2]
+pmuldq m1,   m2
+pmuldq m5,   m2
+pmuldq m7,   m2
+paddq  m0,   m1 ; p += c * s
+paddq  m4,   m5
+paddq  m6,   m7
+
+decnegj
+incposj
+jnz .looporder2
+
+HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
+HACK_PSRAQ m4, m3, [rsp], m2
+HACK_PSRAQ m6, m3, [rsp], m2
+CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
+CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
+CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
+movum1,   [smpq+4]
+movum5,   [smpq+4+mmsize]
+movum7,   [smpq+4+mmsize*2]
+psubd   m1,m0   ; smp[i] - p
+psubd   m5,m4
+psubd   m7,m6
+
+; interleave odd and even samples
+pslldq  m1, 4
+pslldq  m5, 4
+pslldq  m7, 4
+
+pblendw m1, [rsp+mmsize], q0303
+pblendw m5, [rsp+mmsize*2], q0303
+pblendw m7, [rsp+mmsize*3], q0303
+
+movu [resq], m1
+movu [resq+mmsize], m5
+movu [resq+mmsize*2], m7
+
+add resq,3*mmsize
+add smpq,3*mmsize
+sub length, (3*mmsize)/4
 
-add resq,   (3*mmsize)/2
-add smpq,   (3*mmsize)/2
-sub length, (3*mmsize)/8
 jg .looplen
 RET
 
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/8] avcodec/flac: add SSE4.2 version of the 32-bit lpc encoder

2017-11-26 Thread James Darnley
From 1.3 to 2.5 times faster.  Runtime reduced by 4 to 58%.  As with the
16-bit version the speed-up generally increases with compression_level.

Also like the 16-bit version, it is not used with levels less than 3.

After this bug fix in long, long ago in e609cfd697 this 32-bit lpc
encoder is heavily used with 16-bit samples.
---
 libavcodec/x86/flac_dsp_gpl.asm | 106 
 libavcodec/x86/flacdsp_init.c   |   5 ++
 2 files changed, 111 insertions(+)

diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index c461c666be..618306eb5f 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -22,6 +22,12 @@
 
 %include "libavutil/x86/x86util.asm"
 
+SECTION_RODATA
+
+pd_0_int_min: times  2 dd 0, -2147483648
+pq_int_min:   times  2 dq -2147483648
+pq_int_max:   times  2 dq  2147483647
+
 SECTION .text
 
 %macro FUNCTION_BODY_16 0
@@ -116,8 +122,108 @@ RET
 
 %endmacro
 
+%macro PMINSQ 3
+pcmpgtq %3, %2, %1
+pand%1, %3
+pandn   %3, %2
+por %1, %3
+%endmacro
+
+%macro PMAXSQ 3
+pcmpgtq %3, %1, %2
+pand%1, %3
+pandn   %3, %2
+por %1, %3
+%endmacro
+
+%macro CLIPQ 4 ; reg, min, max, tmp
+PMAXSQ %1, %2, %4
+PMINSQ %1, %3, %4
+%endmacro
+
+%macro HACK_PSRAQ 4 ; dst, src (shift), sign extend mask, tmp
+pxor%4, %4 ; zero
+pcmpgtq %4, %1 ; mask where 0 > dst
+pand%4, %3 ; mask & sign extend mask
+psrlq   %1, %2 ; dst >>= shift
+por %1, %4 ; dst | mask
+%endmacro
+
+%macro FUNCTION_BODY_32 0
+
+%if ARCH_X86_64
+cglobal flac_enc_lpc_32, 5, 7, 4, mmsize, res, smp, len, order, coefs
+DECLARE_REG_TMP 5, 6
+%define length r2d
+
+movsxd orderq, orderd
+%else
+cglobal flac_enc_lpc_32, 5, 6, 4, mmsize, res, smp, len, order, coefs
+DECLARE_REG_TMP 2, 5
+%define length r2mp
+%endif
+
+; Here we assume that the maximum order value is 32.  This means that we only
+; need to copy a maximum of 32 samples.  Therefore we let the preprocessor
+; unroll this loop and copy all 32.
+%assign iter 0
+%rep 32/(mmsize/4)
+movu  m0, [smpq+iter]
+movu [resq+iter],  m0
+%assign iter iter+mmsize
+%endrep
+
+learesq,   [resq+orderq*4]
+leasmpq,   [smpq+orderq*4]
+leacoefsq, [coefsq+orderq*4]
+sublength,  orderd
+movd   m3,  r5m
+negorderq
+
+movu   m4, [pd_0_int_min] ; load 1 bit
+psrad  m4,  m3; turn that into shift+1 bits
+pslld  m4,  1 ; reduce that
+mova  [rsp],m4; save sign extend mask
+
+%define posj t0q
+%define negj t1q
+
+.looplen:
+pxor m0,   m0
+mov  posj, orderq
+xor  negj, negj
+
+.looporder:
+movd   m2,  [coefsq+posj*4] ; c = coefs[j]
+SPLATD m2
+pmovzxdq m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
+pmuldq m1,   m2
+paddq  m0,   m1 ; p += c * s
+
+decnegj
+incposj
+jnz .looporder
+
+HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
+CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
+pshufd  m0,m0, q0020 ; pack into first 2 dwords
+movhm1,   [smpq]
+psubd   m1,m0   ; smp[i] - p
+movh   [resq], m1   ; res[i] = smp[i] - (p >> shift)
+
+add resq,   mmsize/2
+add smpq,   mmsize/2
+sub length, mmsize/8
+jg .looplen
+RET
+
+%endmacro ; FUNCTION_BODY_32
+
 INIT_XMM sse4
 FUNCTION_BODY_16
 
+INIT_XMM sse42
+FUNCTION_BODY_32
+
 INIT_YMM avx2
 FUNCTION_BODY_16
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 0a5c01859f..f827186c26 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -29,6 +29,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int 
coeffs[32], int order,
 
 void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 void ff_flac_enc_lpc_16_avx2(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
+void ff_flac_enc_lpc_32_sse42(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 
 #define DECORRELATE_FUNCS(fmt, opt)
  \
 void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int 
channels, \
@@ -111,6 +112,10 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum 
AVSampleFormat fmt, int
 if (CONFIG_GPL)
 c->lpc16_encode = ff_flac_enc_lpc_16_sse4;
 }
+if (EXTERNAL_SSE42(cpu_flags)) {
+if (CONFIG_GPL)
+c->lpc32_encode = ff_flac_enc_lpc_32_sse42;
+}
 if (EXTERNAL_AVX2(cpu_flags)) {
 if (CONFIG_GPL)
 c->lpc16_encode = ff_flac_enc_lpc_16_avx2;
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/8] avcodec/flac: document limitations of the LPC encoder

2017-11-26 Thread James Darnley
State that the maximum value of order is 32.  This limit is used in both
C and x86 assebly code.
---
 libavcodec/flacdsp.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/libavcodec/flacdsp.h b/libavcodec/flacdsp.h
index 7bb0dd0e9a..90fd3f04b5 100644
--- a/libavcodec/flacdsp.h
+++ b/libavcodec/flacdsp.h
@@ -30,6 +30,14 @@ typedef struct FLACDSPContext {
   int qlevel, int len);
 void (*lpc32)(int32_t *samples, const int coeffs[32], int order,
   int qlevel, int len);
+
+/**
+ * These encoder functions support a maximum order of 32.
+ *
+ * This limit is used:
+ * - when CONFIG_SMALL is 0 to unroll a loop in the C template.
+ * - when SSE4 (or newer) is available on x86 to unroll a copy loop.
+ */
 void (*lpc16_encode)(int32_t *res, const int32_t *smp, int len, int order,
  const int32_t coefs[32], int shift);
 void (*lpc32_encode)(int32_t *res, const int32_t *smp, int len, int order,
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 8/8] checkasm: add tests for flacenc lpc coder

2017-11-26 Thread James Darnley
---
 tests/checkasm/flacdsp.c | 72 
 1 file changed, 72 insertions(+)

diff --git a/tests/checkasm/flacdsp.c b/tests/checkasm/flacdsp.c
index dccb54d672..08e5e264ea 100644
--- a/tests/checkasm/flacdsp.c
+++ b/tests/checkasm/flacdsp.c
@@ -20,13 +20,16 @@
 
 #include 
 #include "checkasm.h"
+#include "libavcodec/flac.h"
 #include "libavcodec/flacdsp.h"
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
+#include "libavcodec/mathops.h"
 
 #define BUF_SIZE 256
 #define MAX_CHANNELS 8
+#define BLOCKSIZE 4608
 
 #define randomize_buffers() \
 do {\
@@ -53,6 +56,23 @@ static void check_decorrelate(uint8_t **ref_dst, uint8_t 
**ref_src, uint8_t **ne
 bench_new(new_dst, (int32_t **)new_src, channels, BUF_SIZE / 
sizeof(int32_t), 8);
 }
 
+static void randomize_coefs(int32_t coef[32], int bits)
+{
+int i;
+for (i = 0; i < 32; i++)
+coef[i] = sign_extend(rnd(), bits);
+}
+
+static void randomize_audio(int32_t *a, int32_t *b, int bits)
+{
+int i;
+for (i = 0; i < BLOCKSIZE; i++) {
+int32_t value = sign_extend(rnd(), bits);
+a[i] = value;
+b[i] = value;
+}
+}
+
 void checkasm_check_flacdsp(void)
 {
 LOCAL_ALIGNED_16(uint8_t, ref_dst, [BUF_SIZE*MAX_CHANNELS]);
@@ -87,4 +107,56 @@ void checkasm_check_flacdsp(void)
 }
 
 report("decorrelate");
+
+if (check_func(h.lpc16_encode, "flacdsp.lpc16_encode")) {
+int32_t samples_ref[BLOCKSIZE];
+int32_t samples_new[BLOCKSIZE];
+int32_t residual_ref[BLOCKSIZE+23];
+int32_t residual_new[BLOCKSIZE+23];
+int32_t coefs[32];
+declare_func(void, int32_t *res, const int32_t *smp, int len, int 
order,
+ const int32_t coefs[32], int shift);
+int order;
+
+randomize_audio(samples_ref, samples_new, 16);
+randomize_coefs(coefs, 16);
+for (order = 1; order < 32; order++) {
+int shift = rnd() & 15;
+call_ref(residual_ref, samples_ref, BLOCKSIZE, order, coefs, 
shift);
+call_new(residual_new, samples_new, BLOCKSIZE, order, coefs, 
shift);
+if (memcmp(samples_ref, samples_new, sizeof samples_ref)
+|| memcmp(residual_ref, residual_new, BLOCKSIZE * 
sizeof(int32_t))) {
+fprintf(stderr, "failed at order= %d\n", order);
+fail();
+}
+bench_new(residual_new, samples_new, BLOCKSIZE, order, coefs, 
shift);
+}
+}
+report("flacdsp.lpc16_encode");
+
+if (check_func(h.lpc32_encode, "flacdsp.lpc32_encode")) {
+int32_t samples_ref[BLOCKSIZE];
+int32_t samples_new[BLOCKSIZE];
+int32_t residual_ref[BLOCKSIZE+23];
+int32_t residual_new[BLOCKSIZE+23];
+int32_t coefs[32];
+declare_func(void, int32_t *res, const int32_t *smp, int len, int 
order,
+ const int32_t coefs[32], int shift);
+int order;
+
+randomize_audio(samples_ref, samples_new, 24);
+randomize_coefs(coefs, 24);
+for (order = 1; order < 32; order++) {
+int shift = rnd() & 15;
+call_ref(residual_ref, samples_ref, BLOCKSIZE, order, coefs, 
shift);
+call_new(residual_new, samples_new, BLOCKSIZE, order, coefs, 
shift);
+if (memcmp(samples_ref, samples_new, sizeof samples_ref)
+|| memcmp(residual_ref, residual_new, BLOCKSIZE * 
sizeof(int32_t))) {
+fprintf(stderr, "failed at order= %d\n", order);
+fail();
+}
+bench_new(residual_new, samples_new, BLOCKSIZE, order, coefs, 
shift);
+}
+}
+report("flacdsp.lpc32_encode");
 }
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 4/8] avcodec/flac: partially unroll loop in flac_enc_lpc_32

2017-11-26 Thread James Darnley
Now does 6 samples per iteration, up from 2.

From 1.6 to 2.1 times faster again.  2.5 to 3.9 times faster overall.
Runtime is reduced by a further 4 to 17%.  Reduced by 9 to 65% overall.

Same conditions as previously.
---
 libavcodec/x86/flac_dsp_gpl.asm | 30 +-
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index 618306eb5f..4d212ed212 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -152,13 +152,13 @@ RET
 %macro FUNCTION_BODY_32 0
 
 %if ARCH_X86_64
-cglobal flac_enc_lpc_32, 5, 7, 4, mmsize, res, smp, len, order, coefs
+cglobal flac_enc_lpc_32, 5, 7, 8, mmsize, res, smp, len, order, coefs
 DECLARE_REG_TMP 5, 6
 %define length r2d
 
 movsxd orderq, orderd
 %else
-cglobal flac_enc_lpc_32, 5, 6, 4, mmsize, res, smp, len, order, coefs
+cglobal flac_enc_lpc_32, 5, 6, 8, mmsize, res, smp, len, order, coefs
 DECLARE_REG_TMP 2, 5
 %define length r2mp
 %endif
@@ -190,6 +190,8 @@ mova  [rsp],m4; save sign extend mask
 
 .looplen:
 pxor m0,   m0
+pxor m4,   m4
+pxor m6,   m6
 mov  posj, orderq
 xor  negj, negj
 
@@ -197,23 +199,41 @@ mova  [rsp],m4; save sign extend mask
 movd   m2,  [coefsq+posj*4] ; c = coefs[j]
 SPLATD m2
 pmovzxdq m1,  [smpq+negj*4-4] ; s = smp[i-j-1]
+pmovzxdq m5,  [smpq+negj*4-4+mmsize/2]
+pmovzxdq m7,  [smpq+negj*4-4+mmsize]
 pmuldq m1,   m2
+pmuldq m5,   m2
+pmuldq m7,   m2
 paddq  m0,   m1 ; p += c * s
+paddq  m4,   m5
+paddq  m6,   m7
 
 decnegj
 incposj
 jnz .looporder
 
 HACK_PSRAQ m0, m3, [rsp], m2; p >>= shift
+HACK_PSRAQ m4, m3, [rsp], m2
+HACK_PSRAQ m6, m3, [rsp], m2
 CLIPQ   m0,   [pq_int_min], [pq_int_max], m2 ; clip(p >> shift)
+CLIPQ   m4,   [pq_int_min], [pq_int_max], m2
+CLIPQ   m6,   [pq_int_min], [pq_int_max], m2
 pshufd  m0,m0, q0020 ; pack into first 2 dwords
+pshufd  m4,m4, q0020
+pshufd  m6,m6, q0020
 movhm1,   [smpq]
+movhm5,   [smpq+mmsize/2]
+movhm7,   [smpq+mmsize]
 psubd   m1,m0   ; smp[i] - p
+psubd   m5,m4
+psubd   m7,m6
 movh   [resq], m1   ; res[i] = smp[i] - (p >> shift)
+movh   [resq+mmsize/2], m5
+movh   [resq+mmsize], m7
 
-add resq,   mmsize/2
-add smpq,   mmsize/2
-sub length, mmsize/8
+add resq,   (3*mmsize)/2
+add smpq,   (3*mmsize)/2
+sub length, (3*mmsize)/8
 jg .looplen
 RET
 
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 0/8] left-overs of an ancient patch set for the flac encoder

2017-11-26 Thread James Darnley
Three years ago I was writing some assembly to speed-up the flac encoder.  I got
part of the set committed at that time.  Since then the encoder had a small
overhaul and a major bugfix.  That all meant this set needed a little work to
bring it back on top of master.  I did most of that work in August and finished
it today.

Some of you have been bugging me off and on about finishing it so here it is.
Enjoy, review, critique, whatever.  When people have signed off on it I will
push the set, after addressing issues people have with it.

That bugfix I mentioned was e609cfd697.  It made the benchmarking I originally
did a little less useful because both types of the lpc coder are used for both
sample depths (16 and 24).  That does make the 32-bit version more useful though
because it gets used with 16-bit samples when the intermediates overflow 32
bits.

James Darnley (8):
  avcodec/flac: document limitations of the LPC encoder
  avcodec/flac: add AVX2 version of the 16-bit LPC encoder
  avcodec/flac: add SSE4.2 version of the 32-bit lpc encoder
  avcodec/flac: partially unroll loop in flac_enc_lpc_32
  lavc/x86/flac_dsp_gpl: cosmetic whitespace alignment
  lavc/x86/flac_dsp_gpl: partially unroll 32-bit LPC encoder
  lavc/flacenc: add AVX2 version of the 32-bit LPC encoder
  checkasm: add tests for flacenc lpc coder

 libavcodec/flacdsp.h|   8 ++
 libavcodec/flacenc.c|   2 +-
 libavcodec/x86/flac_dsp_gpl.asm | 267 +---
 libavcodec/x86/flacdsp_init.c   |  13 ++
 tests/checkasm/flacdsp.c|  72 +++
 5 files changed, 343 insertions(+), 19 deletions(-)

-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/8] avcodec/flac: add AVX2 version of the 16-bit LPC encoder

2017-11-26 Thread James Darnley
When compared to the SSE4 version, runtime is reduced by 0.5 to 20%.
After a bug fix log, long ago in e609cfd697 the 16-bit lpc encoder is
used so little that the runtime reduction is no longer correct.  The
function itself is around 2 times faster.  (As one might expect for
doing twice as many samples every iteration.)
---
 libavcodec/flacenc.c|  2 +-
 libavcodec/x86/flac_dsp_gpl.asm | 32 +++-
 libavcodec/x86/flacdsp_init.c   |  5 +
 3 files changed, 33 insertions(+), 6 deletions(-)

diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index 170c3caf48..cf25982c91 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -88,7 +88,7 @@ typedef struct FlacSubframe {
 uint64_t rc_sums[32][MAX_PARTITIONS];
 
 int32_t samples[FLAC_MAX_BLOCKSIZE];
-int32_t residual[FLAC_MAX_BLOCKSIZE+11];
+int32_t residual[FLAC_MAX_BLOCKSIZE+23];
 } FlacSubframe;
 
 typedef struct FlacFrame {
diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index e285158185..c461c666be 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -24,7 +24,8 @@
 
 SECTION .text
 
-INIT_XMM sse4
+%macro FUNCTION_BODY_16 0
+
 %if ARCH_X86_64
 cglobal flac_enc_lpc_16, 5, 7, 8, 0, res, smp, len, order, coefs
 DECLARE_REG_TMP 5, 6
@@ -51,7 +52,7 @@ lea  resq,   [resq+orderq*4]
 lea  smpq,   [smpq+orderq*4]
 lea  coefsq, [coefsq+orderq*4]
 sub  length,  orderd
-movd m3,  r5m
+movd xm3, r5m
 neg  orderq
 
 %define posj t0q
@@ -65,8 +66,20 @@ neg  orderq
 xor  negj, negj
 
 .looporder:
+%if cpuflag(avx)
+vbroadcastss m2, [coefsq+posj*4]
+%else
 movd   m2, [coefsq+posj*4] ; c = coefs[j]
 SPLATD m2
+%endif
+%if cpuflag(avx)
+vpmulld m1, m2, [smpq+negj*4-4]
+vpmulld m5, m2, [smpq+negj*4-4+mmsize]
+vpmulld m7, m2, [smpq+negj*4-4+mmsize*2]
+vpaddd  m0, m1
+vpaddd  m4, m5
+vpaddd  m6, m7
+%else
 movu   m1, [smpq+negj*4-4] ; s = smp[i-j-1]
 movu   m5, [smpq+negj*4-4+mmsize]
 movu   m7, [smpq+negj*4-4+mmsize*2]
@@ -76,14 +89,15 @@ neg  orderq
 paddd  m0,  m1 ; p += c * s
 paddd  m4,  m5
 paddd  m6,  m7
+%endif
 
 decnegj
 incposj
 jnz .looporder
 
-psrad  m0, m3  ; p >>= shift
-psrad  m4, m3
-psrad  m6, m3
+psrad  m0, xm3  ; p >>= shift
+psrad  m4, xm3
+psrad  m6, xm3
 movu   m1,[smpq]
 movu   m5,[smpq+mmsize]
 movu   m7,[smpq+mmsize*2]
@@ -99,3 +113,11 @@ neg  orderq
 sub length, (3*mmsize)/4
 jg .looplen
 RET
+
+%endmacro
+
+INIT_XMM sse4
+FUNCTION_BODY_16
+
+INIT_YMM avx2
+FUNCTION_BODY_16
diff --git a/libavcodec/x86/flacdsp_init.c b/libavcodec/x86/flacdsp_init.c
index 1971f81b8d..0a5c01859f 100644
--- a/libavcodec/x86/flacdsp_init.c
+++ b/libavcodec/x86/flacdsp_init.c
@@ -28,6 +28,7 @@ void ff_flac_lpc_32_xop(int32_t *samples, const int 
coeffs[32], int order,
 int qlevel, int len);
 
 void ff_flac_enc_lpc_16_sse4(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
+void ff_flac_enc_lpc_16_avx2(int32_t *, const int32_t *, int, int, const 
int32_t *,int);
 
 #define DECORRELATE_FUNCS(fmt, opt)
  \
 void ff_flac_decorrelate_ls_##fmt##_##opt(uint8_t **out, int32_t **in, int 
channels, \
@@ -110,6 +111,10 @@ av_cold void ff_flacdsp_init_x86(FLACDSPContext *c, enum 
AVSampleFormat fmt, int
 if (CONFIG_GPL)
 c->lpc16_encode = ff_flac_enc_lpc_16_sse4;
 }
+if (EXTERNAL_AVX2(cpu_flags)) {
+if (CONFIG_GPL)
+c->lpc16_encode = ff_flac_enc_lpc_16_avx2;
+}
 #endif
 #endif /* HAVE_X86ASM */
 }
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 5/8] lavc/x86/flac_dsp_gpl: cosmetic whitespace alignment

2017-11-26 Thread James Darnley
---
 libavcodec/x86/flac_dsp_gpl.asm | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/libavcodec/x86/flac_dsp_gpl.asm b/libavcodec/x86/flac_dsp_gpl.asm
index 4d212ed212..952fc8b86b 100644
--- a/libavcodec/x86/flac_dsp_gpl.asm
+++ b/libavcodec/x86/flac_dsp_gpl.asm
@@ -75,42 +75,42 @@ neg  orderq
 %if cpuflag(avx)
 vbroadcastss m2, [coefsq+posj*4]
 %else
-movd   m2, [coefsq+posj*4] ; c = coefs[j]
-SPLATD m2
+movd m2, [coefsq+posj*4] ; c = coefs[j]
+SPLATD   m2
 %endif
 %if cpuflag(avx)
-vpmulld m1, m2, [smpq+negj*4-4]
-vpmulld m5, m2, [smpq+negj*4-4+mmsize]
-vpmulld m7, m2, [smpq+negj*4-4+mmsize*2]
-vpaddd  m0, m1
-vpaddd  m4, m5
-vpaddd  m6, m7
+vpmulld  m1,  m2, [smpq+negj*4-4]
+vpmulld  m5,  m2, [smpq+negj*4-4+mmsize]
+vpmulld  m7,  m2, [smpq+negj*4-4+mmsize*2]
+vpaddd   m0,  m1
+vpaddd   m4,  m5
+vpaddd   m6,  m7
 %else
-movu   m1, [smpq+negj*4-4] ; s = smp[i-j-1]
-movu   m5, [smpq+negj*4-4+mmsize]
-movu   m7, [smpq+negj*4-4+mmsize*2]
-pmulld m1,  m2
-pmulld m5,  m2
-pmulld m7,  m2
-paddd  m0,  m1 ; p += c * s
-paddd  m4,  m5
-paddd  m6,  m7
+movu m1, [smpq+negj*4-4] ; s = smp[i-j-1]
+movu m5, [smpq+negj*4-4+mmsize]
+movu m7, [smpq+negj*4-4+mmsize*2]
+pmulld   m1,  m2
+pmulld   m5,  m2
+pmulld   m7,  m2
+padddm0,  m1 ; p += c * s
+padddm4,  m5
+padddm6,  m7
 %endif
 
 decnegj
 incposj
 jnz .looporder
 
-psrad  m0, xm3  ; p >>= shift
+psrad  m0, xm3   ; p >>= shift
 psrad  m4, xm3
 psrad  m6, xm3
 movu   m1,[smpq]
 movu   m5,[smpq+mmsize]
 movu   m7,[smpq+mmsize*2]
-psubd  m1, m0  ; smp[i] - p
+psubd  m1, m0; smp[i] - p
 psubd  m5, m4
 psubd  m7, m6
-movu  [resq],  m1  ; res[i] = smp[i] - (p >> shift)
+movu  [resq],  m1; res[i] = smp[i] - (p >> shift)
 movu  [resq+mmsize], m5
 movu  [resq+mmsize*2], m7
 
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avcodec: Implement vp8 nvdec hwaccel

2017-11-26 Thread Mark Thompson
On 26/11/17 22:04, Philip Langdale wrote:
> Signed-off-by: Philip Langdale 
> ---
>  Changelog  |  2 +-
>  configure  |  2 ++
>  libavcodec/Makefile|  1 +
>  libavcodec/hwaccels.h  |  1 +
>  libavcodec/nvdec.c |  1 +
>  libavcodec/nvdec_vp8.c | 97 
> ++
>  libavcodec/version.h   |  3 +-
>  libavcodec/vp8.c   |  6 
>  8 files changed, 111 insertions(+), 2 deletions(-)
>  create mode 100644 libavcodec/nvdec_vp8.c
> 
> diff --git a/Changelog b/Changelog
> index e3092e211f..4db1d57721 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -13,7 +13,7 @@ version :
>  - PCE support for extended channel layouts in the AAC encoder
>  - native aptX encoder and decoder
>  - Raw aptX muxer and demuxer
> -- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel 
> decoding
> +- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1, VP8 and VP9 hwaccel 
> decoding
>  - Intel QSV-accelerated overlay filter
>  - mcompand audio filter
>  - acontrast audio filter
> diff --git a/configure b/configure
> index bc00b71489..e5fa61e83d 100755
> --- a/configure
> +++ b/configure
> @@ -2748,6 +2748,8 @@ vc1_vaapi_hwaccel_deps="vaapi"
>  vc1_vaapi_hwaccel_select="vc1_decoder"
>  vc1_vdpau_hwaccel_deps="vdpau"
>  vc1_vdpau_hwaccel_select="vc1_decoder"
> +vp8_nvdec_hwaccel_deps="nvdec"
> +vp8_nvdec_hwaccel_select="vp8_decoder"
>  vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8"
>  vp8_vaapi_hwaccel_select="vp8_decoder"
>  vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 640edfb590..ca7960cdf4 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -872,6 +872,7 @@ OBJS-$(CONFIG_VC1_NVDEC_HWACCEL)  += nvdec_vc1.o
>  OBJS-$(CONFIG_VC1_QSV_HWACCEL)+= qsvdec_other.o
>  OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)  += vaapi_vc1.o
>  OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)  += vdpau_vc1.o
> +OBJS-$(CONFIG_VP8_NVDEC_HWACCEL)  += nvdec_vp8.o
>  OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)  += vaapi_vp8.o
>  OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)+= dxva2_vp9.o
>  OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)  += dxva2_vp9.o
> diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
> index cefd2b15be..420e2feeea 100644
> --- a/libavcodec/hwaccels.h
> +++ b/libavcodec/hwaccels.h
> @@ -60,6 +60,7 @@ extern const AVHWAccel ff_vc1_dxva2_hwaccel;
>  extern const AVHWAccel ff_vc1_nvdec_hwaccel;
>  extern const AVHWAccel ff_vc1_vaapi_hwaccel;
>  extern const AVHWAccel ff_vc1_vdpau_hwaccel;
> +extern const AVHWAccel ff_vp8_nvdec_hwaccel;
>  extern const AVHWAccel ff_vp8_vaapi_hwaccel;
>  extern const AVHWAccel ff_vp9_d3d11va_hwaccel;
>  extern const AVHWAccel ff_vp9_d3d11va2_hwaccel;
> diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
> index da4451a739..c7a02ff40f 100644
> --- a/libavcodec/nvdec.c
> +++ b/libavcodec/nvdec.c
> @@ -58,6 +58,7 @@ static int map_avcodec_id(enum AVCodecID id)
>  case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
>  case AV_CODEC_ID_MPEG4:  return cudaVideoCodec_MPEG4;
>  case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1;
> +case AV_CODEC_ID_VP8:return cudaVideoCodec_VP8;
>  case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9;
>  case AV_CODEC_ID_WMV3:   return cudaVideoCodec_VC1;
>  }
> diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c
> new file mode 100644
> index 00..6fc0ac7ded
> --- /dev/null
> +++ b/libavcodec/nvdec_vp8.c
> @@ -0,0 +1,97 @@
> +/*
> + * VP8 HW decode acceleration through NVDEC
> + *
> + * Copyright (c) 2017 Philip Langdale
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
> + */
> +
> +#include "avcodec.h"
> +#include "nvdec.h"
> +#include "decode.h"
> +#include "internal.h"
> +#include "vp8.h"
> +
> +static unsigned char safe_get_ref_idx(VP8Frame *frame)
> +{
> +return frame ? ff_nvdec_get_ref_idx(frame->tf.f) : 255;
> +}
> +
> +static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t 
> *buffer, uint32_t size)
> +{
> +VP8Context *h = avctx->priv_data;
> +
> +NVDECContext  *ctx = avctx->internal->hwaccel_priv_data;
> +   

[FFmpeg-devel] [PATCH] avcodec/error_resilience: Use atomic set on writing error_occurred per slice

2017-11-26 Thread Michael Niedermayer
This is more correct if multiple slices are handled in parallel

Signed-off-by: Michael Niedermayer 
---
 libavcodec/error_resilience.c | 6 +++---
 libavcodec/error_resilience.h | 2 +-
 libavcodec/h263dec.c  | 2 +-
 libavcodec/h264_slice.c   | 4 ++--
 libavcodec/mpegvideo.c| 3 ++-
 libavcodec/vc1dec.c   | 2 +-
 6 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index 8f172beca6..c9da30b84b 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -808,7 +808,7 @@ void ff_er_frame_start(ERContext *s)
 memset(s->error_status_table, ER_MB_ERROR | VP_START | ER_MB_END,
s->mb_stride * s->mb_height * sizeof(uint8_t));
 atomic_init(>error_count, 3 * s->mb_num);
-s->error_occurred = 0;
+atomic_init(>error_occurred, 0);
 }
 
 static int er_supported(ERContext *s)
@@ -864,7 +864,7 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
 }
 
 if (status & ER_MB_ERROR) {
-s->error_occurred = 1;
+atomic_store_explicit(>error_occurred, 1, memory_order_relaxed);
 atomic_store(>error_count, INT_MAX);
 }
 
@@ -892,7 +892,7 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
 
 prev_status &= ~ VP_START;
 if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) {
-s->error_occurred = 1;
+atomic_store_explicit(>error_occurred, 1, memory_order_relaxed);
 atomic_store(>error_count, INT_MAX);
 }
 }
diff --git a/libavcodec/error_resilience.h b/libavcodec/error_resilience.h
index 664a765659..5c000e13d1 100644
--- a/libavcodec/error_resilience.h
+++ b/libavcodec/error_resilience.h
@@ -62,7 +62,7 @@ typedef struct ERContext {
 ptrdiff_t b8_stride;
 
 atomic_int error_count;
-int error_occurred;
+atomic_int error_occurred;
 uint8_t *error_status_table;
 uint8_t *er_temp_buffer;
 int16_t *dc_val[3];
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index b222de793b..6fa8a657a4 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -637,7 +637,7 @@ retry:
 if (ff_h263_resync(s) < 0)
 break;
 if (prev_y * s->mb_width + prev_x < s->mb_y * s->mb_width + 
s->mb_x)
-s->er.error_occurred = 1;
+atomic_store_explicit(>er.error_occurred, 1, 
memory_order_relaxed);
 }
 
 if (s->msmpeg4_version < 4 && s->h263_pred)
diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index da76b9293f..5b37596d81 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -2480,7 +2480,7 @@ static void decode_finish_row(const H264Context *h, 
H264SliceContext *sl)
 
 ff_h264_draw_horiz_band(h, sl, top, height);
 
-if (h->droppable || sl->h264->slice_ctx[0].er.error_occurred)
+if (h->droppable || 
atomic_load_explicit(>h264->slice_ctx[0].er.error_occurred, 
memory_order_relaxed))
 return;
 
 ff_thread_report_progress(>cur_pic_ptr->tf, top + height - 1,
@@ -2532,7 +2532,7 @@ static int decode_slice(struct AVCodecContext *avctx, 
void *arg)
 int prev_status = 
h->slice_ctx[0].er.error_status_table[h->slice_ctx[0].er.mb_index2xy[start_i - 
1]];
 prev_status &= ~ VP_START;
 if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END))
-h->slice_ctx[0].er.error_occurred = 1;
+atomic_store_explicit(>slice_ctx[0].er.error_occurred, 1, 
memory_order_relaxed);
 }
 }
 
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 2eb19c21bb..2581589bb7 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -49,6 +49,7 @@
 #include "thread.h"
 #include "wmv2.h"
 #include 
+#include 
 
 static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
int16_t *block, int n, int qscale)
@@ -2592,6 +2593,6 @@ void ff_set_qscale(MpegEncContext * s, int qscale)
 
 void ff_mpv_report_decode_progress(MpegEncContext *s)
 {
-if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && 
!s->er.error_occurred)
+if (s->pict_type != AV_PICTURE_TYPE_B && !s->partitioned_frame && 
!atomic_load_explicit(>er.error_occurred, memory_order_relaxed))
 ff_thread_report_progress(>current_picture_ptr->tf, s->mb_y, 0);
 }
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 96b8bb5364..648c7370fe 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -1058,7 +1058,7 @@ static int vc1_decode_frame(AVCodecContext *avctx, void 
*data,
 get_bits_count(>gb), s->gb.size_in_bits);
 //  if (get_bits_count(>gb) > buf_size * 8)
 //  return -1;
-if(s->er.error_occurred && s->pict_type == AV_PICTURE_TYPE_B) {
+if(atomic_load_explicit(>er.error_occurred, memory_order_relaxed) 
&& s->pict_type == AV_PICTURE_TYPE_B) {
 ret = AVERROR_INVALIDDATA;
 

Re: [FFmpeg-devel] [PATCH v3 3/3] error_resilience: remove avpriv_atomic usage

2017-11-26 Thread Michael Niedermayer
On Sat, Nov 25, 2017 at 05:01:57PM +, Rostislav Pehlivanov wrote:
> Signed-off-by: Rostislav Pehlivanov 
> ---
>  libavcodec/error_resilience.c | 20 ++--
>  libavcodec/error_resilience.h |  3 ++-
>  2 files changed, 12 insertions(+), 11 deletions(-)
> 
> diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
> index 0c7f29d171..8f172beca6 100644
> --- a/libavcodec/error_resilience.c
> +++ b/libavcodec/error_resilience.c
> @@ -807,7 +807,7 @@ void ff_er_frame_start(ERContext *s)
>  
>  memset(s->error_status_table, ER_MB_ERROR | VP_START | ER_MB_END,
> s->mb_stride * s->mb_height * sizeof(uint8_t));
> -s->error_count= 3 * s->mb_num;
> +atomic_init(>error_count, 3 * s->mb_num);
>  s->error_occurred = 0;
>  }
>  
> @@ -852,20 +852,20 @@ void ff_er_add_slice(ERContext *s, int startx, int 
> starty,
>  mask &= ~VP_START;
>  if (status & (ER_AC_ERROR | ER_AC_END)) {
>  mask   &= ~(ER_AC_ERROR | ER_AC_END);
> -avpriv_atomic_int_add_and_fetch(>error_count, start_i - end_i - 
> 1);
> +atomic_fetch_add(>error_count, start_i - end_i - 1);
>  }
>  if (status & (ER_DC_ERROR | ER_DC_END)) {
>  mask   &= ~(ER_DC_ERROR | ER_DC_END);
> -avpriv_atomic_int_add_and_fetch(>error_count, start_i - end_i - 
> 1);
> +atomic_fetch_add(>error_count, start_i - end_i - 1);
>  }
>  if (status & (ER_MV_ERROR | ER_MV_END)) {
>  mask   &= ~(ER_MV_ERROR | ER_MV_END);
> -avpriv_atomic_int_add_and_fetch(>error_count, start_i - end_i - 
> 1);
> +atomic_fetch_add(>error_count, start_i - end_i - 1);
>  }
>  
>  if (status & ER_MB_ERROR) {
>  s->error_occurred = 1;
> -avpriv_atomic_int_set(>error_count, INT_MAX);
> +atomic_store(>error_count, INT_MAX);
>  }
>  
>  if (mask == ~0x7F) {
> @@ -878,7 +878,7 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
>  }
>  
>  if (end_i == s->mb_num)
> -avpriv_atomic_int_set(>error_count, INT_MAX);
> +atomic_store(>error_count, INT_MAX);
>  else {
>  s->error_status_table[end_xy] &= mask;
>  s->error_status_table[end_xy] |= status;
> @@ -893,7 +893,7 @@ void ff_er_add_slice(ERContext *s, int startx, int starty,
>  prev_status &= ~ VP_START;
>  if (prev_status != (ER_MV_END | ER_DC_END | ER_AC_END)) {
>  s->error_occurred = 1;
> -avpriv_atomic_int_set(>error_count, INT_MAX);
> +atomic_store(>error_count, INT_MAX);
>  }
>  }
>  }
> @@ -910,10 +910,10 @@ void ff_er_frame_end(ERContext *s)
>  
>  /* We do not support ER of field pictures yet,
>   * though it should not crash if enabled. */
> -if (!s->avctx->error_concealment || s->error_count == 0||
> +if (!s->avctx->error_concealment || !atomic_load(>error_count)  ||
>  s->avctx->lowres   ||
>  !er_supported(s)   ||
> -s->error_count == 3 * s->mb_width *
> +atomic_load(>error_count) == 3 * s->mb_width *
>(s->avctx->skip_top + s->avctx->skip_bottom)) {
>  return;
>  }
> @@ -927,7 +927,7 @@ void ff_er_frame_end(ERContext *s)
>  if (   mb_x == s->mb_width
>  && s->avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO
>  && (FFALIGN(s->avctx->height, 16)&16)
> -&& s->error_count == 3 * s->mb_width * (s->avctx->skip_top + 
> s->avctx->skip_bottom + 1)
> +&& atomic_load(>error_count) == 3 * s->mb_width * 
> (s->avctx->skip_top + s->avctx->skip_bottom + 1)
>  ) {
>  av_log(s->avctx, AV_LOG_DEBUG, "ignoring last missing slice\n");
>  return;

looking at this again , I suspect these can use some more
lax memory ordering

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] avcodec: Implement vp8 nvdec hwaccel

2017-11-26 Thread Philip Langdale
Signed-off-by: Philip Langdale 
---
 Changelog  |  2 +-
 configure  |  2 ++
 libavcodec/Makefile|  1 +
 libavcodec/hwaccels.h  |  1 +
 libavcodec/nvdec.c |  1 +
 libavcodec/nvdec_vp8.c | 97 ++
 libavcodec/version.h   |  3 +-
 libavcodec/vp8.c   |  6 
 8 files changed, 111 insertions(+), 2 deletions(-)
 create mode 100644 libavcodec/nvdec_vp8.c

diff --git a/Changelog b/Changelog
index e3092e211f..4db1d57721 100644
--- a/Changelog
+++ b/Changelog
@@ -13,7 +13,7 @@ version :
 - PCE support for extended channel layouts in the AAC encoder
 - native aptX encoder and decoder
 - Raw aptX muxer and demuxer
-- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1 and VP9 hwaccel 
decoding
+- NVIDIA NVDEC-accelerated H.264, HEVC, MPEG-1/2/4, VC1, VP8 and VP9 hwaccel 
decoding
 - Intel QSV-accelerated overlay filter
 - mcompand audio filter
 - acontrast audio filter
diff --git a/configure b/configure
index bc00b71489..e5fa61e83d 100755
--- a/configure
+++ b/configure
@@ -2748,6 +2748,8 @@ vc1_vaapi_hwaccel_deps="vaapi"
 vc1_vaapi_hwaccel_select="vc1_decoder"
 vc1_vdpau_hwaccel_deps="vdpau"
 vc1_vdpau_hwaccel_select="vc1_decoder"
+vp8_nvdec_hwaccel_deps="nvdec"
+vp8_nvdec_hwaccel_select="vp8_decoder"
 vp8_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferVP8"
 vp8_vaapi_hwaccel_select="vp8_decoder"
 vp9_d3d11va_hwaccel_deps="d3d11va DXVA_PicParams_VP9"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 640edfb590..ca7960cdf4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -872,6 +872,7 @@ OBJS-$(CONFIG_VC1_NVDEC_HWACCEL)  += nvdec_vc1.o
 OBJS-$(CONFIG_VC1_QSV_HWACCEL)+= qsvdec_other.o
 OBJS-$(CONFIG_VC1_VAAPI_HWACCEL)  += vaapi_vc1.o
 OBJS-$(CONFIG_VC1_VDPAU_HWACCEL)  += vdpau_vc1.o
+OBJS-$(CONFIG_VP8_NVDEC_HWACCEL)  += nvdec_vp8.o
 OBJS-$(CONFIG_VP8_VAAPI_HWACCEL)  += vaapi_vp8.o
 OBJS-$(CONFIG_VP9_D3D11VA_HWACCEL)+= dxva2_vp9.o
 OBJS-$(CONFIG_VP9_DXVA2_HWACCEL)  += dxva2_vp9.o
diff --git a/libavcodec/hwaccels.h b/libavcodec/hwaccels.h
index cefd2b15be..420e2feeea 100644
--- a/libavcodec/hwaccels.h
+++ b/libavcodec/hwaccels.h
@@ -60,6 +60,7 @@ extern const AVHWAccel ff_vc1_dxva2_hwaccel;
 extern const AVHWAccel ff_vc1_nvdec_hwaccel;
 extern const AVHWAccel ff_vc1_vaapi_hwaccel;
 extern const AVHWAccel ff_vc1_vdpau_hwaccel;
+extern const AVHWAccel ff_vp8_nvdec_hwaccel;
 extern const AVHWAccel ff_vp8_vaapi_hwaccel;
 extern const AVHWAccel ff_vp9_d3d11va_hwaccel;
 extern const AVHWAccel ff_vp9_d3d11va2_hwaccel;
diff --git a/libavcodec/nvdec.c b/libavcodec/nvdec.c
index da4451a739..c7a02ff40f 100644
--- a/libavcodec/nvdec.c
+++ b/libavcodec/nvdec.c
@@ -58,6 +58,7 @@ static int map_avcodec_id(enum AVCodecID id)
 case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
 case AV_CODEC_ID_MPEG4:  return cudaVideoCodec_MPEG4;
 case AV_CODEC_ID_VC1:return cudaVideoCodec_VC1;
+case AV_CODEC_ID_VP8:return cudaVideoCodec_VP8;
 case AV_CODEC_ID_VP9:return cudaVideoCodec_VP9;
 case AV_CODEC_ID_WMV3:   return cudaVideoCodec_VC1;
 }
diff --git a/libavcodec/nvdec_vp8.c b/libavcodec/nvdec_vp8.c
new file mode 100644
index 00..6fc0ac7ded
--- /dev/null
+++ b/libavcodec/nvdec_vp8.c
@@ -0,0 +1,97 @@
+/*
+ * VP8 HW decode acceleration through NVDEC
+ *
+ * Copyright (c) 2017 Philip Langdale
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "nvdec.h"
+#include "decode.h"
+#include "internal.h"
+#include "vp8.h"
+
+static unsigned char safe_get_ref_idx(VP8Frame *frame)
+{
+return frame ? ff_nvdec_get_ref_idx(frame->tf.f) : 255;
+}
+
+static int nvdec_vp8_start_frame(AVCodecContext *avctx, const uint8_t *buffer, 
uint32_t size)
+{
+VP8Context *h = avctx->priv_data;
+
+NVDECContext  *ctx = avctx->internal->hwaccel_priv_data;
+CUVIDPICPARAMS *pp = >pic_params;
+FrameDecodeData *fdd;
+NVDECFrame *cf;
+AVFrame *cur_frame = h->framep[VP56_FRAME_CURRENT]->tf.f;
+
+int ret;
+
+ret = ff_nvdec_start_frame(avctx, cur_frame);
+if (ret < 0)
+return ret;
+
+fdd = 

Re: [FFmpeg-devel] [PATCH 12/17] vaapi_decode: Ignore the profile when not useful

2017-11-26 Thread Mark Thompson
On 24/11/17 16:50, Philip Langdale wrote:
> On Fri, 24 Nov 2017 00:51:29 +
> Mark Thompson  wrote:
> 
>> Enables VP8 decoding - the decoder places the the bitstream version
>> in the profile field, which we want to ignore.
>> ---
>>  libavcodec/vaapi_decode.c | 3 ++-
>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/libavcodec/vaapi_decode.c b/libavcodec/vaapi_decode.c
>> index d36ef906a2..572b3a40ac 100644
>> --- a/libavcodec/vaapi_decode.c
>> +++ b/libavcodec/vaapi_decode.c
>> @@ -324,7 +324,8 @@ static int
>> vaapi_decode_make_config(AVCodecContext *avctx, int profile_match = 0;
>>  if (avctx->codec_id != vaapi_profile_map[i].codec_id)
>>  continue;
>> -if (avctx->profile == vaapi_profile_map[i].codec_profile)
>> +if (avctx->profile == vaapi_profile_map[i].codec_profile ||
>> +vaapi_profile_map[i].codec_profile == FF_PROFILE_UNKNOWN)
>>  profile_match = 1;
>>  for (j = 0; j < profile_count; j++) {
>>  if (vaapi_profile_map[i].va_profile == profile_list[j]) {
> 
> First 12 parts look good.

First 12 applied; I have a bit more to do on MJPEG hwaccel for the rest.

Given how many small things got touched here it is quite likely that something 
has broken with this - I've tried to get some testing on all of the affected 
platforms, but do tell me if you find anything further and I'll try to fix it 
asap.

Thanks to everyone who commented on / reviewed this series :)

- Mark
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] Accurately describing ffmpeg-cvslog list [was: Re: [PATCH] Refactor Developer Docs, update dev list section (v2)]

2017-11-26 Thread Jim DeLaHunt

On 2017-11-26 03:42, Carl Eugen Hoyos wrote:


2017-11-26 9:31 GMT+01:00 Jim DeLaHunt :
[...]

+
  @subheading Subscribe to the ffmpeg-cvslog mailing list.
-It is important to do this as the diffs of all commits are sent there and
-reviewed by all the other developers. Bugs and possible improvements or
-general questions regarding commits are discussed there. We expect you to
-react if problems with your code are uncovered.
+Diffs of all commits are sent to the
+@uref{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-cvslog, ffmpeg-cvslog}
+mailing list. Some developers read this list to review all code base changes
+from all sources. Subscribing to this list is not mandatory, if
+all you want to do is submit a patch here and there.

I am (still) against this change.


OK, what specifically are you against?  More important, what are you in 
favour of?


It's difficult for me to read your mind via email.  Would you please 
read the existing section, "Subscribe to the ffmpeg-cvslog mailing 
list."[1], and give wording which to you describes accurately the 
current reality?


I'll observe that we have already heard other opinions:

 * Paul[2]: "Not at all. To be a contributor, it is not needed to
   subscribe to [ffmpeg-cvslog] list."
 * Timo[3]: "Usually if a discussion comes up the mail from cvslog is
   replied to on [ffmpeg-devel] list, so no actual discussion happens
   on the automatic cvslog list."

I don't have strong feelings on the policy for the -cvslog list, except 
that the current documentation is clearly inaccurate in describing the 
current reality. That is obvious even on a short exposure to the 
community.  "Bugs and possible improvements or general questions 
regarding commits" are /not/ discussed "there", on ffmpeg-cvslog. The 
statement "We expect you to react if problems with your code are 
uncovered." is correct, but more accurately describes behaviour on 
ffmpeg-devel, not ffmpeg-cvslog.



Sorry, Carl Eugen


It's great that you care.  What wording do you support?

Best regards,
 —Jim DeLaHunt

[1] 
[2] 

[3] 



--
--Jim DeLaHunt, j...@jdlh.com http://blog.jdlh.com/ (http://jdlh.com/)
  multilingual websites consultant

  355-1027 Davie St, Vancouver BC V6E 4L2, Canada
 Canada mobile +1-604-376-8953

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/3] avformat: deprecate AVFMT_FLAG_AUTO_BSFr

2017-11-26 Thread James Almer
On 11/26/2017 6:16 PM, Clément Bœsch wrote:
> On Sun, Nov 26, 2017 at 05:51:04PM -0300, James Almer wrote:
>> The bitstream filters inserted by this option should not be optional.
> 
> Will ffmpeg error out if it's built without the required bsf? (or is there
> a hard dep in the configure?)

Mmh, no, as is it will silently keep going.

I'll resend the patch making the muxers select the required bsfs.
Although admittedly there is no way to guarantee they will be available
even with that, since they are in a different library and you could
always dynamically load a different lavc.

Maybe we could emit a warning at runtime if the bsfs is required but not
compiled in?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] Policy on ffmpeg-devel list and contributions [was: Re: [PATCH] Refactor Developer Docs, update dev list section (v2)]

2017-11-26 Thread Jim DeLaHunt

On 2017-11-26 03:42, Carl Eugen Hoyos wrote:

2017-11-26 9:31 GMT+01:00 Jim DeLaHunt :

-@subsection Documentation/Other
+@section Documentation/Other
+@subheading Subscribe to the ffmpeg-devel mailing list.
+It is important to be subscribed to the

Of course it is important but I would much, much prefer
if people send their patches without being subscribed
than not sending their patches because it is implied
that they cannot send patches if they don't want to
subscribe
But if people are not interested in improving their contribution,
I would still prefer the patches to be sent.


So, how realistic is this concern about non-subscribers sending patches 
to ffmpeg-devel?  Does it actually happen? Can you point to, say, three 
patches in the last six months which were sent by non-subscribers to 
ffmpeg-devel and were applied to the code base?


Given how so many of the patches submitted by subscribers who know the 
unwritten rules are subjected to veto and revision, I would be surprised 
if many non-subscribers who are ignorant of the unwritten rules would 
produce something satisfactory.


That said, would your concern be addressed if I were to add this sentence:

   However, it is more important to the project that we receive your
   patch than that you be subscribed to the ffmpeg-devel list. If you
   have a patch, and don't want to subscribe and discuss the patch,
   then please do send it to the list.

(I am tempted to add a phrase like, "If you want to send your patch to 
ffmpeg-devel without discussion, as if  abandoning your baby on the 
steps of the orphanage, please do; one of the kind caregivers on the 
list may pick it up and find it a good home."  But this is probably too 
snarky to be appropriate.)



+@uref{https://lists.ffmpeg.org/mailman/listinfo/ffmpeg-devel, ffmpeg-devel}
+mailing list, because any patch you contribute must be sent there

No:
I believe it is very important that trivial patches are not sent
to the development mailing list - its volume is already so big
that some patches are sadly (!) forgotten.
Tell me more about the procedure for trivial patches. I have not seen 
this documented, and I don't know about it. Does this apply to 
occasional contributors, or only to trusted experienced ffmpeg project 
members with commit privileges to the repository?


The proposed text does not distinguish between occasional contributors 
and experienced project members. Maybe it should. I believe that the 
main audience of `doc/developer.html` is new and occasional 
contributors, because the experienced members will have internalised all 
the undocumented norms, and won't be referring to this page.


What revised wording do you propose for the above phrase "any patch you 
contribute must be sent there"?



+Also, this list is where bugs and possible improvements or

I believe this is misleading or even wrong.
Oh?  I took this wording from the existing 
 regarding the 
ffmpeg-cvslog list:
"Bugs and possible improvements or general questions regarding commits 
are discussed there."

What is misleading or wrong about this wording? What is your objection?

What alternate wording would you propose for this sentence, which 
describes why contributors should pay attention to the content of 
ffmpeg-devel?

+general questions regarding commits are discussed. That may be helpful
+information as you write your contribution. Finally, by being a list
+subscriber your contribution will be posted immediately to the list,
+without the moderation hold which messages from non-subscribers experience.
+


[...]

I think what is important about this new section is that it describes 
the policy and importance of the ffmpeg-devel list. It's interesting 
that the project had not put this into words in the current 
documentation. I'm trying to do that.  Carl Eugen, you are quick to 
object to what you don't like about proposed wording. I think it's 
especially important that you suggest wording that does capture what you 
do support. You obviously care.


Best regards,
 —Jim DeLaHunt

--
--Jim DeLaHunt, j...@jdlh.com http://blog.jdlh.com/ (http://jdlh.com/)
  multilingual websites consultant

  355-1027 Davie St, Vancouver BC V6E 4L2, Canada
 Canada mobile +1-604-376-8953

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Refactor Developer Docs, update dev list section (v2)

2017-11-26 Thread Paul B Mahol
On 11/26/17, Jim DeLaHunt  wrote:
> On 2017-11-26 04:38, Paul B Mahol wrote:
>> On 11/26/17, Nicolas George  wrote:
>>> Paul B Mahol (2017-11-26):
 Your opinions are irrelevant.
>>> # Be friendly and respectful towards others and third parties.
>>> # Treat others the way you yourself want to be treated.
>>>
>>> Please stop trampling the code of conduct.
>> Please stop being extremly rude and ignorant of other people's work.
> Paul, I am new on this list, but it seems to me that the only one being
> rude in this thread is you. I certainly hope this behaviour is not how
> you interpret "Be friendly and respectful towards others and third parties".

 I'm not first who started it, and since you are new to list you missed
 lots of flame wars...
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] Refactor Developer Docs, update dev list section (v2)

2017-11-26 Thread Jim DeLaHunt

On 2017-11-26 04:38, Paul B Mahol wrote:

On 11/26/17, Nicolas George  wrote:

Paul B Mahol (2017-11-26):

Your opinions are irrelevant.

# Be friendly and respectful towards others and third parties.
# Treat others the way you yourself want to be treated.

Please stop trampling the code of conduct.

Please stop being extremly rude and ignorant of other people's work.
Paul, I am new on this list, but it seems to me that the only one being 
rude in this thread is you. I certainly hope this behaviour is not how 
you interpret "Be friendly and respectful towards others and third parties".


--
--Jim DeLaHunt, j...@jdlh.com http://blog.jdlh.com/ (http://jdlh.com/)
  multilingual websites consultant

  355-1027 Davie St, Vancouver BC V6E 4L2, Canada
 Canada mobile +1-604-376-8953

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/3] avformat: deprecate AVFMT_FLAG_AUTO_BSFr

2017-11-26 Thread Clément Bœsch
On Sun, Nov 26, 2017 at 05:51:04PM -0300, James Almer wrote:
> The bitstream filters inserted by this option should not be optional.

Will ffmpeg error out if it's built without the required bsf? (or is there
a hard dep in the configure?)

-- 
Clément B.


signature.asc
Description: PGP signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] avcodec/utvideodec : add x86 SIMD (SSSE3) for gradient prediction

2017-11-26 Thread Martin Vignali
Hello,

Patch in attach add SIMD (SSSE 3) for gradient prediction
and a checkasm test

Checkasm result (width = 1024) (kaby lake, macos 10.12)
add_gradient_pred_c: 1708.8
add_gradient_pred_ssse3: 533.0

Benchmark on a 3 min HD File in gradient (422)
without SIMD :
bench: utime=102.695s
bench: maxrss=102592512kB

with SIMD
bench: utime=91.712s
bench: maxrss=102543360kB



i will add AVX2 version later (need more clean before submitting, and will
conflict with another patch add_left_pred avx2 version)

This new dsp func, can probably also be use by magicyuv decoder


i'm not sure about the best asm way for load an uint8_t to all part of an
xmm,

Comment welcome

Martin
Jokyo Images


0001-avcodec-utvideodec-add-SIMD-SSSE3-for-gradient_pred.patch
Description: Binary data


0002-checkasm-llviddsp-add-test-for-add_gradient_pred.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/3] avformat: deprecate AVFMT_FLAG_AUTO_BSF

2017-11-26 Thread James Almer
The bitstream filters inserted by this option should not be optional.
They are needed to succesfully mux files in some cases, and to prevent
muxing broken files in others.

This is more in line with AVCodec.bsfs()

Signed-off-by: James Almer 
---
 libavformat/avformat.h  | 4 +++-
 libavformat/mux.c   | 3 ---
 libavformat/options_table.h | 6 --
 libavformat/version.h   | 3 +++
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 4f2798a871..e39e5e4ada 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1449,7 +1449,9 @@ typedef struct AVFormatContext {
 #endif
 #define AVFMT_FLAG_FAST_SEEK   0x8 ///< Enable fast, but inaccurate seeks 
for some formats
 #define AVFMT_FLAG_SHORTEST   0x10 ///< Stop muxing when the shortest 
stream stops.
-#define AVFMT_FLAG_AUTO_BSF   0x20 ///< Add bitstream filters as requested 
by the muxer
+#if FF_API_LAVF_AUTO_BSF_FLAG
+#define AVFMT_FLAG_AUTO_BSF   0x20 ///< Deprecated, does nothing.
+#endif
 
 /**
  * Maximum size of the data read from input for determining
diff --git a/libavformat/mux.c b/libavformat/mux.c
index ebb9102f11..382c20fc63 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -825,9 +825,6 @@ static int do_packet_auto_bsf(AVFormatContext *s, AVPacket 
*pkt) {
 AVStream *st = s->streams[pkt->stream_index];
 int i, ret;
 
-if (!(s->flags & AVFMT_FLAG_AUTO_BSF))
-return 1;
-
 if (s->oformat->check_bitstream) {
 if (!st->internal->bitstream_checked) {
 if ((ret = s->oformat->check_bitstream(s, pkt)) < 0)
diff --git a/libavformat/options_table.h b/libavformat/options_table.h
index b8fa47c6fd..8b1e6f3d18 100644
--- a/libavformat/options_table.h
+++ b/libavformat/options_table.h
@@ -39,7 +39,7 @@ static const AVOption avformat_options[] = {
 {"probesize", "set probing size", OFFSET(probesize), AV_OPT_TYPE_INT64, {.i64 
= 500 }, 32, INT64_MAX, D},
 {"formatprobesize", "number of bytes to probe file format", 
OFFSET(format_probesize), AV_OPT_TYPE_INT, {.i64 = PROBE_BUF_MAX}, 0, 
INT_MAX-1, D},
 {"packetsize", "set packet size", OFFSET(packet_size), AV_OPT_TYPE_INT, {.i64 
= DEFAULT }, 0, INT_MAX, E},
-{"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = AVFMT_FLAG_AUTO_BSF 
}, INT_MIN, INT_MAX, D|E, "fflags"},
+{"fflags", NULL, OFFSET(flags), AV_OPT_TYPE_FLAGS, {.i64 = 0 }, INT_MIN, 
INT_MAX, D|E, "fflags"},
 {"flush_packets", "reduce the latency by flushing out packets immediately", 0, 
AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_FLUSH_PACKETS }, INT_MIN, INT_MAX, E, 
"fflags"},
 {"ignidx", "ignore index", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_IGNIDX }, 
INT_MIN, INT_MAX, D, "fflags"},
 {"genpts", "generate pts", 0, AV_OPT_TYPE_CONST, {.i64 = AVFMT_FLAG_GENPTS }, 
INT_MIN, INT_MAX, D, "fflags"},
@@ -57,7 +57,9 @@ static const AVOption avformat_options[] = {
 {"seek2any", "allow seeking to non-keyframes on demuxer level when supported", 
OFFSET(seek2any), AV_OPT_TYPE_BOOL, {.i64 = 0 }, 0, 1, D},
 {"bitexact", "do not write random/volatile data", 0, AV_OPT_TYPE_CONST, { .i64 
= AVFMT_FLAG_BITEXACT }, 0, 0, E, "fflags" },
 {"shortest", "stop muxing with the shortest stream", 0, AV_OPT_TYPE_CONST, { 
.i64 = AVFMT_FLAG_SHORTEST }, 0, 0, E, "fflags" },
-{"autobsf", "add needed bsfs automatically", 0, AV_OPT_TYPE_CONST, { .i64 = 
AVFMT_FLAG_AUTO_BSF }, 0, 0, E, "fflags" },
+#if FF_API_LAVF_AUTO_BSF_FLAG
+{"autobsf", "deprecated, does nothing", 0, AV_OPT_TYPE_CONST, { .i64 = 
AVFMT_FLAG_AUTO_BSF }, 0, 0, E, "fflags" },
+#endif
 {"analyzeduration", "specify how many microseconds are analyzed to probe the 
input", OFFSET(max_analyze_duration), AV_OPT_TYPE_INT64, {.i64 = 0 }, 0, 
INT64_MAX, D},
 {"cryptokey", "decryption key", OFFSET(key), AV_OPT_TYPE_BINARY, {.dbl = 0}, 
0, 0, D},
 {"indexmem", "max memory used for timestamp index (per stream)", 
OFFSET(max_index_size), AV_OPT_TYPE_INT, {.i64 = 1<<20 }, 0, INT_MAX, D},
diff --git a/libavformat/version.h b/libavformat/version.h
index feb1461c41..d2427dd875 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -82,6 +82,9 @@
 #ifndef FF_API_OLD_AVIO_EOF_0
 #define FF_API_OLD_AVIO_EOF_0   (LIBAVFORMAT_VERSION_MAJOR < 59)
 #endif
+#ifndef FF_API_LAVF_AUTO_BSF_FLAG
+#define FF_API_LAVF_AUTO_BSF_FLAG   (LIBAVFORMAT_VERSION_MAJOR < 59)
+#endif
 
 
 #ifndef FF_API_R_FRAME_RATE
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 2/3] avformat/mux: stop delaying writing the header

2017-11-26 Thread James Almer
Every bitstream filter behaves as intended now, so there's no need to
wait for the first packet of every stream.

Signed-off-by: James Almer 
---
 libavformat/avformat.h |  2 +-
 libavformat/internal.h |  6 -
 libavformat/mux.c  | 52 --
 libavformat/options_table.h|  2 +-
 libavformat/tests/fifo_muxer.c | 52 --
 tests/ref/fate/fifo-muxer-tst  |  1 -
 6 files changed, 12 insertions(+), 103 deletions(-)

diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index 322210fae0..4f2798a871 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -1449,7 +1449,7 @@ typedef struct AVFormatContext {
 #endif
 #define AVFMT_FLAG_FAST_SEEK   0x8 ///< Enable fast, but inaccurate seeks 
for some formats
 #define AVFMT_FLAG_SHORTEST   0x10 ///< Stop muxing when the shortest 
stream stops.
-#define AVFMT_FLAG_AUTO_BSF   0x20 ///< Wait for packet data before 
writing a header, and add bitstream filters as requested by the muxer
+#define AVFMT_FLAG_AUTO_BSF   0x20 ///< Add bitstream filters as requested 
by the muxer
 
 /**
  * Maximum size of the data read from input for determining
diff --git a/libavformat/internal.h b/libavformat/internal.h
index fcd47840a5..36a57214ce 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -120,12 +120,6 @@ struct AVFormatInternal {
 
 int avoid_negative_ts_use_pts;
 
-/**
- * Whether or not a header has already been written
- */
-int header_written;
-int write_header_ret;
-
 /**
  * Timestamp of the end of the shortest stream.
  */
diff --git a/libavformat/mux.c b/libavformat/mux.c
index b1244c67f3..ebb9102f11 100644
--- a/libavformat/mux.c
+++ b/libavformat/mux.c
@@ -458,25 +458,6 @@ static void flush_if_needed(AVFormatContext *s)
 }
 }
 
-static int write_header_internal(AVFormatContext *s)
-{
-if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
-avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_HEADER);
-if (s->oformat->write_header) {
-int ret = s->oformat->write_header(s);
-if (ret >= 0 && s->pb && s->pb->error < 0)
-ret = s->pb->error;
-s->internal->write_header_ret = ret;
-if (ret < 0)
-return ret;
-flush_if_needed(s);
-}
-s->internal->header_written = 1;
-if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
-avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_UNKNOWN);
-return 0;
-}
-
 int avformat_init_output(AVFormatContext *s, AVDictionary **options)
 {
 int ret = 0;
@@ -515,11 +496,18 @@ int avformat_write_header(AVFormatContext *s, 
AVDictionary **options)
 if ((ret = avformat_init_output(s, options)) < 0)
 return ret;
 
-if (!(s->oformat->check_bitstream && s->flags & AVFMT_FLAG_AUTO_BSF)) {
-ret = write_header_internal(s);
+if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
+avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_HEADER);
+if (s->oformat->write_header) {
+int ret = s->oformat->write_header(s);
+if (ret >= 0 && s->pb && s->pb->error < 0)
+ret = s->pb->error;
 if (ret < 0)
 goto fail;
+flush_if_needed(s);
 }
+if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
+avio_write_marker(s->pb, AV_NOPTS_VALUE, AVIO_DATA_MARKER_UNKNOWN);
 
 if (!s->internal->streams_initialized) {
 if ((ret = init_pts(s)) < 0)
@@ -739,12 +727,6 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
 }
 }
 
-if (!s->internal->header_written) {
-ret = s->internal->write_header_ret ? s->internal->write_header_ret : 
write_header_internal(s);
-if (ret < 0)
-goto fail;
-}
-
 if ((pkt->flags & AV_PKT_FLAG_UNCODED_FRAME)) {
 AVFrame *frame = (AVFrame *)pkt->data;
 av_assert0(pkt->size == UNCODED_FRAME_PACKET_SIZE);
@@ -760,8 +742,6 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt)
 ret = s->pb->error;
 }
 
-fail:
-
 if (ret < 0) {
 pkt->pts = pts_backup;
 pkt->dts = dts_backup;
@@ -894,11 +874,6 @@ int av_write_frame(AVFormatContext *s, AVPacket *pkt)
 
 if (!pkt) {
 if (s->oformat->flags & AVFMT_ALLOW_FLUSH) {
-if (!s->internal->header_written) {
-ret = s->internal->write_header_ret ? 
s->internal->write_header_ret : write_header_internal(s);
-if (ret < 0)
-return ret;
-}
 ret = s->oformat->write_packet(s, NULL);
 flush_if_needed(s);
 if (ret >= 0 && s->pb && s->pb->error < 0)
@@ -1282,14 +1257,8 @@ int av_write_trailer(AVFormatContext *s)
 goto fail;
 }
 
-if (!s->internal->header_written) {
-ret = s->internal->write_header_ret ? 

[FFmpeg-devel] [PATCH 1/3] ffmpeg: use avformat_init_output to initialize output files

2017-11-26 Thread James Almer
Postpone writing the header until the first output packet is ready to be
written.
This makes sure any stream parameter change that could take place while
processing an input frame will be taken into account when writing the
output file header.

Signed-off-by: James Almer 
---
 fftools/ffmpeg.c | 31 ++-
 fftools/ffmpeg.h |  3 +++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/fftools/ffmpeg.c b/fftools/ffmpeg.c
index 0c16e75ab0..07476e88e7 100644
--- a/fftools/ffmpeg.c
+++ b/fftools/ffmpeg.c
@@ -700,7 +700,7 @@ static void write_packet(OutputFile *of, AVPacket *pkt, 
OutputStream *ost, int u
 ost->frame_number++;
 }
 
-if (!of->header_written) {
+if (!of->initialized) {
 AVPacket tmp_pkt = {0};
 /* the muxer is not initialized yet, buffer the packet */
 if (!av_fifo_space(ost->muxing_queue)) {
@@ -804,6 +804,17 @@ static void write_packet(OutputFile *of, AVPacket *pkt, 
OutputStream *ost, int u
   );
 }
 
+if (!of->header_written) {
+ret = avformat_write_header(s, >opts);
+if (ret < 0) {
+av_log(NULL, AV_LOG_ERROR,
+   "Could not write header for output file #%d: %s\n",
+   ost->file_index, av_err2str(ret));
+exit_program(1);
+}
+of->header_written = 1;
+}
+
 ret = av_interleaved_write_frame(s, pkt);
 if (ret < 0) {
 print_error("av_interleaved_write_frame()", ret);
@@ -2756,7 +2767,7 @@ static void print_sdp(void)
 AVFormatContext **avc;
 
 for (i = 0; i < nb_output_files; i++) {
-if (!output_files[i]->header_written)
+if (!output_files[i]->initialized)
 return;
 }
 
@@ -2947,16 +2958,26 @@ static int check_init_output_file(OutputFile *of, int 
file_index)
 
 of->ctx->interrupt_callback = int_cb;
 
-ret = avformat_write_header(of->ctx, >opts);
+ret = avformat_init_output(of->ctx, >opts);
 if (ret < 0) {
 av_log(NULL, AV_LOG_ERROR,
-   "Could not write header for output file #%d "
+   "Could not initialize output file #%d "
"(incorrect codec parameters ?): %s\n",
file_index, av_err2str(ret));
 return ret;
 }
 //assert_avoptions(of->opts);
-of->header_written = 1;
+of->initialized = ret;
+if (!ret) {
+ret = avformat_write_header(of->ctx, >opts);
+if (ret < 0) {
+av_log(NULL, AV_LOG_ERROR,
+   "Could not write header for output file #%d: %s\n",
+   file_index, av_err2str(ret));
+return ret;
+}
+of->initialized = of->header_written = 1;
+}
 
 av_dump_format(of->ctx, file_index, of->ctx->filename, 1);
 
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index e0977e1bf1..c46ffd8b03 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -571,6 +571,9 @@ typedef struct OutputFile {
 
 int shortest;
 
+// avformat_init_output() has been called for this file
+int initialized;
+// avformat_write_header() has been called for this file
 int header_written;
 } OutputFile;
 
-- 
2.15.0

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] hls demuxer: add option to defer parsing of variants

2017-11-26 Thread Rainer Hochecker

Variants are presented as programs and can be loaded later by
setting discard flags on the program. Currently Kodi chooses the
program that best matches the desired bit rate.

Rainer
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avfilter: add normalize filter

2017-11-26 Thread Richard Ling
Thanks Paul.

Thanks also to all reviewers for your comments! It's very helpful to have
extra sets of eyes to find my bugs.

Moritz is right, there is an unused #define, I will try to find time to
patch. Or maybe Paul can remove it

Regards
R.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] hls demuxer: add option to defer parsing of variants

2017-11-26 Thread Rainer Hochecker
2017-11-26 12:04 GMT+01:00 Steven Liu :
> 2017-11-26 18:46 GMT+08:00 Rainer Hochecker :
>> fixed mem leak poined out by Steven
> Hi Rainer,
>
> I'm not sure that is memleak, but looks like memleak when reading
> the code, i see the code always in hls.c before this patch, but no
> people report it memleak.
> If that is memleak, maybe use goto method is better way, because
> the workflow of bellow have alloc resource faild check, i will point
> out base on your patch.
>>

Hi Steven,

As soon as you associate the probe buffer with the context, the context cares
about allocated resources. That's most likely the reason why pb was not
cleared here. It is assigned to the context right after this block.

Rainer
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] hls demuxer: add option to defer parsing of variants

2017-11-26 Thread Rainer Hochecker
>> +/*
>> + * If this is a live stream and this playlist looks like it is one 
>> segment
>> + * behind, try to sync it up so that every substream starts at the same
>> + * time position (so e.g. avformat_find_stream_info() will see packets 
>> from
>> + * all active streams within the first few seconds). This is not very 
>> generic,
>> + * though, as the sequence numbers are technically independent.
>> + */
>> +highest_cur_seq_no = 0;
>> +for (i = 0; i < c->n_playlists; i++) {
>> +struct playlist *pls = c->playlists[i];
>> +if (!pls->parsed)
>> +continue;
>> +if (pls->cur_seq_no > highest_cur_seq_no)
>> +highest_cur_seq_no = pls->cur_seq_no;
>> +}
>> +if (!pls->finished && pls->cur_seq_no == highest_cur_seq_no - 1 &&
>> +highest_cur_seq_no < pls->start_seq_no + pls->n_segments) {
>> +pls->cur_seq_no = highest_cur_seq_no;
>> +}
>> +
>> +pls->read_buffer = av_malloc(INITIAL_BUFFER_SIZE);
>> +if (!pls->read_buffer){
>> +ret = AVERROR(ENOMEM);
>> +avformat_free_context(pls->ctx);
>> +pls->ctx = NULL;
>> +return ret;
>> +}
>> +ffio_init_context(>pb, pls->read_buffer, INITIAL_BUFFER_SIZE, 0, 
>> pls,
>> +  read_data, NULL, NULL);
>> +pls->pb.seekable = 0;
>> +ret = av_probe_input_buffer(>pb, _fmt, pls->segments[0]->url,
>> +NULL, 0, 0);
>> +if (ret < 0) {
>> +/* Free the ctx - it isn't initialized properly at this point,
>> + * so avformat_close_input shouldn't be called. If
>> + * avformat_open_input fails below, it frees and zeros the
>> + * context, so it doesn't need any special treatment like this. */
>> +av_log(c->ctx, AV_LOG_ERROR, "Error when loading first segment 
>> '%s'\n", pls->segments[0]->url);
>> +avformat_free_context(pls->ctx);
>> +pls->ctx = NULL;
>> +return ret;
> Is that pls->read_buffer will memleak?
>
>

yes, looks like this. this is already an issue in current code.
nevertheless, I will fix it here.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc: reset codec on receiving packet after EOF in compat_decode

2017-11-26 Thread Nicolas George
Marton Balint (2017-11-26):
> Okay, I am exagarating a bit, but unconditionally returning AVERROR(ENOSYS)
> would be an even better incentive, no? :)

For invalid uses of the API that can be easily avoided by the
application (like not explicitly passing NULL to a function), a hard
crash is even better.

Regards,

-- 
  Nicolas George


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] lavc: reset codec on receiving packet after EOF in compat_decode

2017-11-26 Thread Marton Balint



On Sun, 26 Nov 2017, James Almer wrote:


On 11/26/2017 12:19 PM, Nicolas George wrote:

James Almer (2017-11-26):

The old decode API is not scheduled for removal right now probably
because 99% of decoders need to be ported.


I think this statement contains some confusion that is harmful to the
discussion.

There are two interfaces worth considering in this discussion: the
application -> library interface, i.e. the avcodec_decode_*dio()
functions, and the framework -> decoder interface, i.e. the decode /
receive_frame / ... callbacks.

When you are stating "because 99% of decoders need to be ported", you
are referring to the framework-decoder interface. On the other hand, the
misuse of the API that is at the origin of this thread is related to the
application-library interface.


Yes, my bad. Got the public API and the internal callbacks mixed. So
ignore that part.
Guess then that the functions did not get a removal schedule because
they are still too ubiquitous downstream.

My second paragraph stands, in any case. I consider this a good chance
to get downstreams to migrate.


Okay, I am exagarating a bit, but unconditionally returning 
AVERROR(ENOSYS) would be an even better incentive, no? :)


We can blame API usage (we should rather blame unclear documentation), but 
no matter how we put it, with the change, we broke the user experience of 
two major projects. If fixing it (at least partially) is so easy, I still 
don't see why we should not do that.


People who still oppose this change, please respond.

Thanks,
Marton
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH] avfilter/vf_tile: add init_padding option

2017-11-26 Thread Nicolas George
Paul B Mahol (2017-11-24):
> Signed-off-by: Paul B Mahol 
> ---
>  doc/filters.texi  |  4 
>  libavfilter/vf_tile.c | 12 +++-
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 76929e4db5..11ce0482c2 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -14497,6 +14497,10 @@ is "black".
>  @item overlap
>  Set the number of frames to overlap when tiling several successive frames 
> together.
>  The value must be between @code{0} and @var{nb_frames - 1}.
> +

> +@item init_padding
> +Set the number of input frames to initially consume before displaying first 
> output frame.
> +The value must be between @code{0} and @var{nb_frames - 1}.

The documentations says that, the code does the opposite.

>  @end table
>  
>  @subsection Examples
> diff --git a/libavfilter/vf_tile.c b/libavfilter/vf_tile.c
> index 7717ce12e7..c78fa611dd 100644
> --- a/libavfilter/vf_tile.c
> +++ b/libavfilter/vf_tile.c
> @@ -38,6 +38,7 @@ typedef struct TileContext {
>  unsigned margin;
>  unsigned padding;
>  unsigned overlap;
> +unsigned init_padding;
>  unsigned current;
>  unsigned nb_frames;
>  FFDrawContext draw;
> @@ -62,6 +63,8 @@ static const AVOption tile_options[] = {
>  { "color",   "set the color of the unused area", OFFSET(rgba_color), 
> AV_OPT_TYPE_COLOR, {.str = "black"}, .flags = FLAGS },
>  { "overlap", "set how many frames to overlap for each render", 
> OFFSET(overlap),
>  AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
> +{ "init_padding", " set how many frames to initially pad", 
> OFFSET(init_padding),
> +AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS },
>  { NULL }
>  };
>  
> @@ -99,6 +102,13 @@ static av_cold int init(AVFilterContext *ctx)
>  tile->overlap = tile->nb_frames - 1;
>  }
>  
> +if (tile->init_padding >= tile->nb_frames) {
> +av_log(ctx, AV_LOG_WARNING, "init_padding must be less than %d\n", 
> tile->nb_frames);

> +tile->current = 0;

Unnecessary and confusing.

> +} else {
> +tile->current = tile->init_padding;
> +}
> +
>  return 0;
>  }
>  
> @@ -201,7 +211,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame 
> *picref)
>  tile->out_ref->height = outlink->h;
>  
>  /* fill surface once for margin/padding */

> -if (tile->margin || tile->padding)
> +if (tile->margin || tile->padding || tile->init_padding != 0)

This change should only be applied to the first frame.

>  ff_fill_rectangle(>draw, >blank,
>tile->out_ref->data,
>tile->out_ref->linesize,

Regards,

-- 
  Nicolas George


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] avcodec/utvideodec : use dsp add_median_pred for second line

2017-11-26 Thread Martin Vignali
Hello,

Patch in attach

dsp func need align16 data
make only the start of the line in scalar, and call the dsp for the rest
instead of process the entire line in scalar


pass make fate-utvideo for me

Martin


0001-avcodec-utvideodec-use-dsp-add_median_pred-for-secon.patch
Description: Binary data
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


  1   2   >