[FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
Up until now, the writing process for level 1 elements (those elements for which CRC-32 elements are written by default) was this in case the output was seekable: Write the EBML ID, write an "unkown length" EBML number of the desired length, then write the element into a dynamic buffer, then write the dynamic buffer (after possible calculation and writing of the CRC-element), then seek back to the size element and overwrite the unknown-size element with the real size. The seeking and overwriting part has been eliminated by not writing the size initially. Signed-off-by: Andreas Rheinhardt --- libavformat/matroskaenc.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 0be33419eb..2875552469 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -58,8 +58,9 @@ #include "libavcodec/internal.h" typedef struct ebml_master { -int64_t pos;///< absolute offset in the file where the master's elements start -int sizebytes; ///< how many bytes were reserved for the size +int64_t pos;///< absolute offset in the containing AVIOContext where the size field starts +///< for level 1 elements or else where the master's elements start +int sizebytes; ///< how many bytes were reserved/shall be used for the size } ebml_master; typedef struct mkv_seekhead_entry { @@ -316,6 +317,7 @@ static ebml_master start_ebml_master(AVIOContext *pb, uint32_t elementid, uint64_t expectedsize) { int bytes = expectedsize ? ebml_num_size(expectedsize) : 8; + put_ebml_id(pb, elementid); put_ebml_size_unknown(pb, bytes); return (ebml_master) {avio_tell(pb), bytes }; @@ -340,7 +342,10 @@ static int start_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matros return ret; if (pb->seekable & AVIO_SEEKABLE_NORMAL) { -*master = start_ebml_master(pb, elementid, expectedsize); +int bytes = expectedsize ? ebml_num_size(expectedsize) : 8; + +put_ebml_id(pb, elementid); +*master = (ebml_master) { avio_tell(pb), bytes }; if (mkv->write_crc) put_ebml_void(*dyn_cp, 6); /* Reserve space for CRC32 so position/size calculations using avio_tell() take it into account */ } else @@ -357,13 +362,13 @@ static void end_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matrosk if (pb->seekable & AVIO_SEEKABLE_NORMAL) { size = avio_close_dyn_buf(*dyn_cp, &buf); +put_ebml_num(pb, size, master.sizebytes); if (mkv->write_crc) { skip = 6; /* Skip reserved 6-byte long void element from the dynamic buffer. */ AV_WL32(crc, av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), UINT32_MAX, buf + skip, size - skip) ^ UINT32_MAX); put_ebml_binary(pb, EBML_ID_CRC32, crc, sizeof(crc)); } avio_write(pb, buf + skip, size - skip); -end_ebml_master(pb, master); } else { end_ebml_master(*dyn_cp, master); size = avio_close_dyn_buf(*dyn_cp, &buf); @@ -383,8 +388,8 @@ static void end_ebml_master_crc32_preliminary(AVIOContext *pb, AVIOContext **dyn uint8_t *buf; int size = avio_get_dyn_buf(*dyn_cp, &buf); +put_ebml_num(pb, size, master.sizebytes); avio_write(pb, buf, size); -end_ebml_master(pb, master); } static void put_xiph_size(AVIOContext *pb, int size) -- 2.21.0 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
Hendrik Leppkes: > On Tue, Apr 2, 2019 at 5:55 PM Andreas Rheinhardt via ffmpeg-devel > wrote: >> >> Hello, >> >> thanks for taking a look at this. >> >> Hendrik Leppkes: >>> On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel >>> wrote: Up until now, the writing process for level 1 elements (those elements for which CRC-32 elements are written by default) was this in case the output was seekable: Write the EBML ID, write an "unkown length" EBML number of the desired length, then write the element into a dynamic buffer, then write the dynamic buffer (after possible calculation and writing of the CRC-element), then seek back to the size element and overwrite the unknown-size element with the real size. The seeking and overwriting part has been eliminated by not writing the size initially. >>> >>> I'm not particularly happy that it stops using start_ebml_master and >>> basically duplicates its functionality. This adds possible maintenance >>> in the future, or hidden bugs. >>> >> 1. start_ebml_master has the disadvantage that the length of the size >> element is chosen in advance; if one doesn't want to use another >> dynamic buffer for every master element (I'm thinking about the >> thousands of cues for which this would be mostly useless as they are >> written with one byte length fields anyway) and doesn't want to waste >> bytes for writing lots of elements (the level 1 elements), then these >> two functions need to be separated. > > I understand why its done, it just doesn't seem .. ideal. > > I didn't check the code entirely, but can this function theoretically > still be used for non-L1 elements after your changes, if one is > desired to have a CRC32 further down the EBML tree? > If not, it might be sensible to rename it. > It can still be used for writing elements at other levels. The reason that I speak of level 1 elements in the commit messages is that these are well-defined Matroska terms and that they (currently) coincide with the master elements that are affected by the patch. >> >> 6. Or do you mean by making "relative positions valid again" that you >> want that the offset in s->pb + the offset of an element in the >> dynamic buffer coincides with the position where this element will >> actually by written in the output? This would effectively mean that >> one can't use the minimum required number of bytes for the cluster's >> size field and instead would have to reserve so many that it always >> works. The only advantage of this would be that the "Writing block at >> offset" log message can really spit out the real output offset of the >> block (which is impossible if the length of the cluster's size field >> hasn't been chosen before writing the block). That's a very slim >> advantage to me. >> > > I forgot that relative positions within an element are supposed to be > counted only from the size element, so nevermind that part. > Yeah, I already thought so. - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
On 4/2/2019 1:23 PM, Hendrik Leppkes wrote: > On Tue, Apr 2, 2019 at 5:55 PM Andreas Rheinhardt via ffmpeg-devel > wrote: >> >> Hello, >> >> thanks for taking a look at this. >> >> Hendrik Leppkes: >>> On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel >>> wrote: Up until now, the writing process for level 1 elements (those elements for which CRC-32 elements are written by default) was this in case the output was seekable: Write the EBML ID, write an "unkown length" EBML number of the desired length, then write the element into a dynamic buffer, then write the dynamic buffer (after possible calculation and writing of the CRC-element), then seek back to the size element and overwrite the unknown-size element with the real size. The seeking and overwriting part has been eliminated by not writing the size initially. >>> >>> I'm not particularly happy that it stops using start_ebml_master and >>> basically duplicates its functionality. This adds possible maintenance >>> in the future, or hidden bugs. >>> >> 1. start_ebml_master has the disadvantage that the length of the size >> element is chosen in advance; if one doesn't want to use another >> dynamic buffer for every master element (I'm thinking about the >> thousands of cues for which this would be mostly useless as they are >> written with one byte length fields anyway) and doesn't want to waste >> bytes for writing lots of elements (the level 1 elements), then these >> two functions need to be separated. > > I understand why its done, it just doesn't seem .. ideal. > > I didn't check the code entirely, but can this function theoretically > still be used for non-L1 elements after your changes, if one is > desired to have a CRC32 further down the EBML tree? > If not, it might be sensible to rename it. Any element can have a CRC32 child element, but it's not required and i don't think it's worth trying. Only Level 1 elements should have one, and that's what's currently being done. > >> >> 6. Or do you mean by making "relative positions valid again" that you >> want that the offset in s->pb + the offset of an element in the >> dynamic buffer coincides with the position where this element will >> actually by written in the output? This would effectively mean that >> one can't use the minimum required number of bytes for the cluster's >> size field and instead would have to reserve so many that it always >> works. The only advantage of this would be that the "Writing block at >> offset" log message can really spit out the real output offset of the >> block (which is impossible if the length of the cluster's size field >> hasn't been chosen before writing the block). That's a very slim >> advantage to me. >> > > I forgot that relative positions within an element are supposed to be > counted only from the size element, so nevermind that part. > > - Hendrik > ___ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > https://ffmpeg.org/mailman/listinfo/ffmpeg-devel > > To unsubscribe, visit link above, or email > ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe". > ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
Hendrik Leppkes: > On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel > wrote: >> @@ -383,8 +388,8 @@ static void >> end_ebml_master_crc32_preliminary(AVIOContext *pb, AVIOContext **dyn >> uint8_t *buf; >> int size = avio_get_dyn_buf(*dyn_cp, &buf); >> >> +put_ebml_num(pb, size, master.sizebytes); >> avio_write(pb, buf, size); >> -end_ebml_master(pb, master); >> } >> > > The indent here seems off, or did my mail client mangle something? > In patch 8 I removed a redundant check in end_ebml_master_crc32_preliminary, but I did not change indentation (that's done in patch #12). So put_ebml_num is written at the level of indentation that the rest will be after the indentation will have been fixed in #12. (Alternatively, one could write put_ebml_num one indentation level too high and shift it to the left in #12.) - Andreas ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel wrote: > @@ -383,8 +388,8 @@ static void end_ebml_master_crc32_preliminary(AVIOContext > *pb, AVIOContext **dyn > uint8_t *buf; > int size = avio_get_dyn_buf(*dyn_cp, &buf); > > +put_ebml_num(pb, size, master.sizebytes); > avio_write(pb, buf, size); > -end_ebml_master(pb, master); > } > The indent here seems off, or did my mail client mangle something? - Hendrik ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
On Tue, Apr 2, 2019 at 5:55 PM Andreas Rheinhardt via ffmpeg-devel wrote: > > Hello, > > thanks for taking a look at this. > > Hendrik Leppkes: > > On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel > > wrote: > >> > >> Up until now, the writing process for level 1 elements (those elements > >> for which CRC-32 elements are written by default) was this in case the > >> output was seekable: Write the EBML ID, write an "unkown length" EBML > >> number of the desired length, then write the element into a dynamic > >> buffer, then write the dynamic buffer (after possible calculation and > >> writing of the CRC-element), then seek back to the size element and > >> overwrite the unknown-size element with the real size. The seeking and > >> overwriting part has been eliminated by not writing the size initially. > >> > > > > I'm not particularly happy that it stops using start_ebml_master and > > basically duplicates its functionality. This adds possible maintenance > > in the future, or hidden bugs. > > > 1. start_ebml_master has the disadvantage that the length of the size > element is chosen in advance; if one doesn't want to use another > dynamic buffer for every master element (I'm thinking about the > thousands of cues for which this would be mostly useless as they are > written with one byte length fields anyway) and doesn't want to waste > bytes for writing lots of elements (the level 1 elements), then these > two functions need to be separated. I understand why its done, it just doesn't seem .. ideal. I didn't check the code entirely, but can this function theoretically still be used for non-L1 elements after your changes, if one is desired to have a CRC32 further down the EBML tree? If not, it might be sensible to rename it. > > 6. Or do you mean by making "relative positions valid again" that you > want that the offset in s->pb + the offset of an element in the > dynamic buffer coincides with the position where this element will > actually by written in the output? This would effectively mean that > one can't use the minimum required number of bytes for the cluster's > size field and instead would have to reserve so many that it always > works. The only advantage of this would be that the "Writing block at > offset" log message can really spit out the real output offset of the > block (which is impossible if the length of the cluster's size field > hasn't been chosen before writing the block). That's a very slim > advantage to me. > I forgot that relative positions within an element are supposed to be counted only from the size element, so nevermind that part. - Hendrik ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
Hello, thanks for taking a look at this. Hendrik Leppkes: > On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel > wrote: >> >> Up until now, the writing process for level 1 elements (those elements >> for which CRC-32 elements are written by default) was this in case the >> output was seekable: Write the EBML ID, write an "unkown length" EBML >> number of the desired length, then write the element into a dynamic >> buffer, then write the dynamic buffer (after possible calculation and >> writing of the CRC-element), then seek back to the size element and >> overwrite the unknown-size element with the real size. The seeking and >> overwriting part has been eliminated by not writing the size initially. >> > > I'm not particularly happy that it stops using start_ebml_master and > basically duplicates its functionality. This adds possible maintenance > in the future, or hidden bugs. > 1. start_ebml_master has the disadvantage that the length of the size element is chosen in advance; if one doesn't want to use another dynamic buffer for every master element (I'm thinking about the thousands of cues for which this would be mostly useless as they are written with one byte length fields anyway) and doesn't want to waste bytes for writing lots of elements (the level 1 elements), then these two functions need to be separated. > Additionally, before this change, the position of the current writer > points to the position where the dynamic block is actually going to be > written - after this, it'll write the size inbetween. > Especially considering this behavior is different between seek and > non-seek, I feel a bit uneasy if something might want to reference the > position - there is a specific warning about that in the CRC case, > which would apply here equally. 2. What do you mean by "current writer"? It's s->pb, isn't it? 3. You should know that the behaviour differs already between the seekable and the non-seekable cases (it is true that s->pb points to the position where the cluster's dynamic buffer is going to be written, but these are two different positions: once pointing to the EBML ID, once pointing to the first child of the cluster) and the relative offsets in the non-seekable case are wrong: That's because the ID and the length field (i.e. the unknown-length size field that will be overwritten later) are already included in the dynamic buffer in the non-seekable case, but according to the Matroska specifications, the relative offset count begins after the cluster's length field (i.e. the first element in the cluster (usually a CRC-32 or the cluster timestamp) has a relative offset of zero), so that the relative offset is currently off by the size of the EBML ID + size of the length field, i.e. 12 bytes. This is currently no grave problem given that the relative packet position is only used for the cues, which aren't written in non-seekable mode. But it is a problem for debug output. The "Writing block at offset" message is wrong and currently outputs the relative offset, relative to the beginning of the cluster EBML ID in the non-seekable mode, relative to the beginning of the cluster's elements in the seekable mode. See patch #14 (where I have just found a little bug that I'll fix in a minute, but which addresses said log message). 4. Given that my patchset actually unifies the seekable and non-seekable cases wrt writing clusters (and in particular, fixes the relative offsets in the non-seekable case), I don't see a reason to feel uneasy. I actually feel uneasy about the current state of affairs, hence this patchset. > > Maybe the last point can be improved if the size is being included in > the dynamic buffer and overwritten therein, as such making relative > positions valid again, even if different to the non-seek case? > 5. Currently, the offset in the dynamic buffer coincides with the relative offset according to the Matroska specifications if we are in seekable mode. The next patch in this series will extend this to the non-seekable mode as well. I don't see a reason why I should alter this by including the size in the dynamic buffer. This would not make relative offsets valid again, but would make them invalid (except if we subtracted the length of what has been reserved for the size field again...). 6. Or do you mean by making "relative positions valid again" that you want that the offset in s->pb + the offset of an element in the dynamic buffer coincides with the position where this element will actually by written in the output? This would effectively mean that one can't use the minimum required number of bytes for the cluster's size field and instead would have to reserve so many that it always works. The only advantage of this would be that the "Writing block at offset" log message can really spit out the real output offset of the block (which is impossible if the length of the cluster's size field hasn't been chosen before writing the block). That's a very slim advan
Re: [FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
On Tue, Apr 2, 2019 at 3:36 PM Andreas Rheinhardt via ffmpeg-devel wrote: > > Up until now, the writing process for level 1 elements (those elements > for which CRC-32 elements are written by default) was this in case the > output was seekable: Write the EBML ID, write an "unkown length" EBML > number of the desired length, then write the element into a dynamic > buffer, then write the dynamic buffer (after possible calculation and > writing of the CRC-element), then seek back to the size element and > overwrite the unknown-size element with the real size. The seeking and > overwriting part has been eliminated by not writing the size initially. > I'm not particularly happy that it stops using start_ebml_master and basically duplicates its functionality. This adds possible maintenance in the future, or hidden bugs. Additionally, before this change, the position of the current writer points to the position where the dynamic block is actually going to be written - after this, it'll write the size inbetween. Especially considering this behavior is different between seek and non-seek, I feel a bit uneasy if something might want to reference the position - there is a specific warning about that in the CRC case, which would apply here equally. Maybe the last point can be improved if the size is being included in the dynamic buffer and overwritten therein, as such making relative positions valid again, even if different to the non-seek case? - Hendrik ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".
[FFmpeg-devel] [PATCH 10/15] avformat/matroskaenc: Avoid seeking when writing level 1 elements
Up until now, the writing process for level 1 elements (those elements for which CRC-32 elements are written by default) was this in case the output was seekable: Write the EBML ID, write an "unkown length" EBML number of the desired length, then write the element into a dynamic buffer, then write the dynamic buffer (after possible calculation and writing of the CRC-element), then seek back to the size element and overwrite the unknown-size element with the real size. The seeking and overwriting part has been eliminated by not writing the size initially. Signed-off-by: Andreas Rheinhardt --- libavformat/matroskaenc.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 89bd8d5d44..5edbba89e8 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -58,8 +58,9 @@ #include "libavcodec/internal.h" typedef struct ebml_master { -int64_t pos;///< absolute offset in the file where the master's elements start -int sizebytes; ///< how many bytes were reserved for the size +int64_t pos;///< absolute offset in the containing AVIOContext where the size field starts +///< for level 1 elements or else where the master's elements start +int sizebytes; ///< how many bytes were reserved/shall be used for the size } ebml_master; typedef struct mkv_seekhead_entry { @@ -316,6 +317,7 @@ static ebml_master start_ebml_master(AVIOContext *pb, uint32_t elementid, uint64_t expectedsize) { int bytes = expectedsize ? ebml_num_size(expectedsize) : 8; + put_ebml_id(pb, elementid); put_ebml_size_unknown(pb, bytes); return (ebml_master) {avio_tell(pb), bytes }; @@ -340,7 +342,10 @@ static int start_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matros return ret; if (pb->seekable & AVIO_SEEKABLE_NORMAL) { -*master = start_ebml_master(pb, elementid, expectedsize); +int bytes = expectedsize ? ebml_num_size(expectedsize) : 8; + +put_ebml_id(pb, elementid); +*master = (ebml_master) { avio_tell(pb), bytes }; if (mkv->write_crc) put_ebml_void(*dyn_cp, 6); /* Reserve space for CRC32 so position/size calculations using avio_tell() take it into account */ } else @@ -357,13 +362,13 @@ static void end_ebml_master_crc32(AVIOContext *pb, AVIOContext **dyn_cp, Matrosk if (pb->seekable & AVIO_SEEKABLE_NORMAL) { size = avio_close_dyn_buf(*dyn_cp, &buf); +put_ebml_num(pb, size, master.sizebytes); if (mkv->write_crc) { skip = 6; /* Skip reserved 6-byte long void element from the dynamic buffer. */ AV_WL32(crc, av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), UINT32_MAX, buf + skip, size - skip) ^ UINT32_MAX); put_ebml_binary(pb, EBML_ID_CRC32, crc, sizeof(crc)); } avio_write(pb, buf + skip, size - skip); -end_ebml_master(pb, master); } else { end_ebml_master(*dyn_cp, master); size = avio_close_dyn_buf(*dyn_cp, &buf); @@ -383,8 +388,8 @@ static void end_ebml_master_crc32_preliminary(AVIOContext *pb, AVIOContext **dyn uint8_t *buf; int size = avio_get_dyn_buf(*dyn_cp, &buf); +put_ebml_num(pb, size, master.sizebytes); avio_write(pb, buf, size); -end_ebml_master(pb, master); } static void put_xiph_size(AVIOContext *pb, int size) -- 2.19.2 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org https://ffmpeg.org/mailman/listinfo/ffmpeg-devel To unsubscribe, visit link above, or email ffmpeg-devel-requ...@ffmpeg.org with subject "unsubscribe".