Re: [FFmpeg-devel] [PATCH 3/4] matroska: redo seekhead handling

2015-02-09 Thread Michael Niedermayer
On Mon, Feb 09, 2015 at 08:39:00PM +0100, wm4 wrote:
 In particular, this reads chained seekheads. This makes seeking faster
 in files which have the index indirectly linked through 2 seekheads.

do you have a affected example (for testing) ?

thanks

[...]
-- 
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Avoid a single point of failure, be that a person or equipment.


signature.asc
Description: Digital signature
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 3/4] matroska: redo seekhead handling

2015-02-09 Thread Carl Eugen Hoyos
Michael Niedermayer michaelni at gmx.at writes:

 do you have a affected example (for testing) ?

I suspect this is ticket #2263, please mention 
it in the commit message.

Carl Eugen

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 3/4] matroska: redo seekhead handling

2015-02-09 Thread wm4
In particular, this reads chained seekheads. This makes seeking faster
in files which have the index indirectly linked through 2 seekheads.

As a side-effect, this warns when reading level-1 (toplevel) elements
multiple times (other than seekheads, clusters, and void/crc). Such
elements are not valid and likely break everything.
---
 libavformat/matroskadec.c | 125 --
 1 file changed, 88 insertions(+), 37 deletions(-)

diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index 2957536..7af7845 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -71,6 +71,7 @@ typedef enum {
 EBML_UTF8,
 EBML_BIN,
 EBML_NEST,
+EBML_LEVEL1,
 EBML_PASS,
 EBML_STOP,
 EBML_SINT,
@@ -253,6 +254,12 @@ typedef struct {
 } MatroskaCluster;
 
 typedef struct {
+uint64_t id;
+uint64_t pos;
+int parsed;
+} MatroskaLevel1Element;
+
+typedef struct {
 AVFormatContext *ctx;
 
 /* EBML stuff */
@@ -290,6 +297,10 @@ typedef struct {
 /* File has a CUES element, but we defer parsing until it is needed. */
 int cues_parsing_deferred;
 
+/* Level1 elements and whether they were read yet */
+MatroskaLevel1Element level1_elems[64];
+int num_level1_elems;
+
 int current_cluster_num_blocks;
 int64_t current_cluster_pos;
 MatroskaCluster current_cluster;
@@ -513,13 +524,13 @@ static EbmlSyntax matroska_seekhead[] = {
 };
 
 static EbmlSyntax matroska_segment[] = {
-{ MATROSKA_ID_INFO,EBML_NEST, 0, 0, { .n = matroska_info } },
-{ MATROSKA_ID_TRACKS,  EBML_NEST, 0, 0, { .n = matroska_tracks } },
-{ MATROSKA_ID_ATTACHMENTS, EBML_NEST, 0, 0, { .n = matroska_attachments } 
},
-{ MATROSKA_ID_CHAPTERS,EBML_NEST, 0, 0, { .n = matroska_chapters } },
-{ MATROSKA_ID_CUES,EBML_NEST, 0, 0, { .n = matroska_index } },
-{ MATROSKA_ID_TAGS,EBML_NEST, 0, 0, { .n = matroska_tags } },
-{ MATROSKA_ID_SEEKHEAD,EBML_NEST, 0, 0, { .n = matroska_seekhead } },
+{ MATROSKA_ID_INFO,EBML_LEVEL1, 0, 0, { .n = matroska_info } },
+{ MATROSKA_ID_TRACKS,  EBML_LEVEL1, 0, 0, { .n = matroska_tracks } },
+{ MATROSKA_ID_ATTACHMENTS, EBML_LEVEL1, 0, 0, { .n = matroska_attachments 
} },
+{ MATROSKA_ID_CHAPTERS,EBML_LEVEL1, 0, 0, { .n = matroska_chapters } },
+{ MATROSKA_ID_CUES,EBML_LEVEL1, 0, 0, { .n = matroska_index } },
+{ MATROSKA_ID_TAGS,EBML_LEVEL1, 0, 0, { .n = matroska_tags } },
+{ MATROSKA_ID_SEEKHEAD,EBML_LEVEL1, 0, 0, { .n = matroska_seekhead } },
 { MATROSKA_ID_CLUSTER, EBML_STOP },
 { 0 }
 };
@@ -919,6 +930,42 @@ static int ebml_parse_nest(MatroskaDemuxContext *matroska, 
EbmlSyntax *syntax,
 return res;
 }
 
+/*
+ * Allocate and return the entry for the level1 element with the given ID. If
+ * an entry already exists, return the existing entry.
+ */
+static MatroskaLevel1Element *matroska_find_level1_elem(MatroskaDemuxContext 
*matroska,
+uint32_t id)
+{
+int i;
+MatroskaLevel1Element *elem;
+
+// Some files link to all clusters; useless.
+if (id == MATROSKA_ID_CLUSTER)
+return NULL;
+
+// There can be multiple seekheads.
+if (id != MATROSKA_ID_SEEKHEAD) {
+for (i = 0; i  matroska-num_level1_elems; i++) {
+if (matroska-level1_elems[i].id == id)
+return matroska-level1_elems[i];
+}
+}
+
+// Only a completely broken file would have more elements.
+// It also provides a low-effort way to escape from circular seekheads
+// (every iteration will add a level1 entry).
+if (matroska-num_level1_elems = FF_ARRAY_ELEMS(matroska-level1_elems)) {
+av_log(matroska-ctx, AV_LOG_ERROR, Too many level1 elements or 
circular seekheads.\n);
+return NULL;
+}
+
+elem = matroska-level1_elems[matroska-num_level1_elems++];
+*elem = (MatroskaLevel1Element){.id = id};
+
+return elem;
+}
+
 static int ebml_parse_elem(MatroskaDemuxContext *matroska,
EbmlSyntax *syntax, void *data)
 {
@@ -937,6 +984,7 @@ static int ebml_parse_elem(MatroskaDemuxContext *matroska,
 uint64_t length;
 int res;
 void *newelem;
+MatroskaLevel1Element *level1_elem;
 
 data = (char *) data + syntax-data_offset;
 if (syntax-list_elem_size) {
@@ -979,11 +1027,20 @@ static int ebml_parse_elem(MatroskaDemuxContext 
*matroska,
 case EBML_BIN:
 res = ebml_read_binary(pb, length, data);
 break;
+case EBML_LEVEL1:
 case EBML_NEST:
 if ((res = ebml_read_master(matroska, length))  0)
 return res;
 if (id == MATROSKA_ID_SEGMENT)
 matroska-segment_start = avio_tell(matroska-ctx-pb);
+if (id == MATROSKA_ID_CUES)
+matroska-cues_parsing_deferred = 0;
+if (syntax-type == EBML_LEVEL1 
+(level1_elem =