I have reworked the whole thing, the decoding function now takes care of allocating the buffer dynamically.
Please let me know what you think. David
>From c1aa780bfd1709adbbe9ed9b8f41dd51be304cb9 Mon Sep 17 00:00:00 2001 From: David Goldwich <[email protected]> Date: Tue, 13 Sep 2011 18:57:50 +0200 Subject: [PATCH] id3v2: add support for non-text and GEOB type tag frames This extends the ID3v2 parser to allow for reading of non-text (i.e. other than T***) meta tag frames providing a ff_id3v2_read_all() function. An additional data structure 'ID3v2ExtraMeta' is introduced for these tags since AVDictionary is string oriented and unsuitable for binary data. A parser for tag frames of type GEOB is implemented, which is needed to extract keyring information from encrypted OMA files. GEOB data is parsed into 'ID3v2ExtraMetaGEOB' data structures. The routine to decode characters from different encodings to UTF-8, formerly part of the read_ttag() function, is moved to its own function. Because some tag frames contain subparts of unknown length, the function is now also able to read until a null character is found. In addition, the function now takes care of allocating a buffer long enough to hold the decoded characters. Signed-off-by: David Goldwich <[email protected]> --- libavformat/id3v2.c | 231 +++++++++++++++++++++++++++++++++++++++++++------- libavformat/id3v2.h | 38 ++++++++- 2 files changed, 235 insertions(+), 34 deletions(-) diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c index be6c03b..7d5c311 100644 --- a/libavformat/id3v2.c +++ b/libavformat/id3v2.c @@ -19,6 +19,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <inttypes.h> #include "id3v2.h" #include "id3v1.h" #include "libavutil/avstring.h" @@ -59,29 +60,46 @@ static unsigned int get_size(AVIOContext *s, int len) return v; } -static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) +/** + * Decode content to UTF-8 according to encoding type. Depending on the given + * taglen argument, either taglen bytes are read from the AVIOContext or until + * the first U+0000 character. The decoded buffer is always null terminated. + * + * @param dst Pointer where the address of the buffer with the decoded bytes is + * stored. Buffer must be freed by caller. + * @param dstlen Pointer to an int where the length of the decoded string + * is stored (in bytes, incl. null termination) + * @param taglen The length of the tag. If this is <0 then pb is read until a + * null character is found. Otherwise taglen bytes are read. + * @param key The tag frame key (for logging) + * @returns The number of bytes read from pb + */ +static unsigned int decode_str(AVFormatContext *s, AVIOContext *pb, int type, + char **dst, int *dstlen, int taglen, const char *key) { - char *q, dst[512]; - const char *val = NULL; - int len, dstlen = sizeof(dst) - 1; - unsigned genre; + int len = 0; + const int seeknull = (taglen < 0); + uint8_t tmp; + uint32_t ch = 1; + const uint64_t oldpos = avio_tell(pb); unsigned int (*get)(AVIOContext*) = avio_rb16; + AVIOContext *dynbuf; - dst[0] = 0; - if (taglen < 1) - return; + if (!dst) + return 0; - taglen--; /* account for encoding type byte */ + if (avio_open_dyn_buf(&dynbuf)) { + av_log(s, AV_LOG_ERROR, "Error opening memory stream\n"); + return 0; + } - switch (avio_r8(pb)) { /* encoding type */ + switch (type) { /* encoding type */ case ID3v2_ENCODING_ISO8859: - q = dst; - while (taglen-- && q - dst < dstlen - 7) { - uint8_t tmp; - PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;) + while (!seeknull && taglen-- || seeknull && ch) { + ch = avio_r8(pb); + PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) } - *q = 0; break; case ID3v2_ENCODING_UTF16BOM: @@ -93,31 +111,54 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const cha break; default: av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key); - return; + return 2; } // fall-through case ID3v2_ENCODING_UTF16BE: - q = dst; - while (taglen > 1 && q - dst < dstlen - 7) { - uint32_t ch; - uint8_t tmp; - - GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;) - PUT_UTF8(ch, tmp, *q++ = tmp;) + while (!seeknull && taglen > 1 || seeknull && ch) { + GET_UTF16(ch, (seeknull || (taglen -= 2) >= 0 ? get(pb) : 0), break;) + PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);) } - *q = 0; break; case ID3v2_ENCODING_UTF8: - len = FFMIN(taglen, dstlen); - avio_read(pb, dst, len); - dst[len] = 0; + while (!seeknull && taglen-- || seeknull && ch) { + ch = avio_r8(pb); + avio_w8(dynbuf, ch); + } break; default: av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key); } + if (ch) + avio_w8(dynbuf, 0); + + len = avio_close_dyn_buf(dynbuf, (uint8_t **)dst); + if (dstlen) + *dstlen = len; + + return avio_tell(pb) - oldpos; +} + +/** + * Parse a text tag. + */ +static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const char *key) +{ + char *dst; + const char *val = NULL; + int len, dstlen; + unsigned genre; + + if (taglen < 1) + return; + + taglen--; /* account for encoding type byte */ + + decode_str(s, pb, avio_r8(pb), &dst, &dstlen, taglen, key); + if (!(strcmp(key, "TCON") && strcmp(key, "TCO")) && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 1) && genre <= ID3v1_GENRE_MAX) @@ -134,6 +175,75 @@ static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const cha if (val) av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE); + + av_free(dst); +} + +/** + * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct. + */ +static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, char *tag, ID3v2ExtraMeta **extra_meta) +{ + ID3v2ExtraMetaGEOB *geob_data; + ID3v2ExtraMeta *new_extra; + char encoding; + unsigned int len; + + if (taglen < 1) + return; + + geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB)); + if (!geob_data) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %"PRIuPTR" bytes\n", sizeof(ID3v2ExtraMetaGEOB)); + return; + } + + new_extra = av_mallocz(sizeof(ID3v2ExtraMeta)); + if (!new_extra) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %"PRIuPTR" bytes\n", sizeof(ID3v2ExtraMeta)); + return; + } + + /* read encoding type byte */ + encoding = avio_r8(pb); + taglen--; + + /* read MIME type (always ISO-8859) and save as UTF-8 */ + taglen -= decode_str(s, pb, ID3v2_ENCODING_ISO8859, &geob_data->mime_type, NULL, -1, "GEOB"); + + /* read file name and save as UTF-8 */ + taglen -= decode_str(s, pb, encoding, &geob_data->file_name, NULL, -1, "GEOB"); + + /* read content description and save as UTF-8 */ + taglen -= decode_str(s, pb, encoding, &geob_data->description, NULL, -1, "GEOB"); + + /* save encapsulated binary data */ + geob_data->data = av_malloc(taglen); + if (!geob_data->data) { + av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen); + return; + } + if ((len = avio_read(pb, geob_data->data, taglen)) < taglen) + av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data truncated.\n"); + geob_data->datasize = len; + + /* add data to the list */ + new_extra->tag = "GEOB"; + new_extra->data = geob_data; + new_extra->next = *extra_meta; + *extra_meta = new_extra; +} + +/** + * Free GEOB type extra metadata. + */ +static void free_geobtag(ID3v2ExtraMetaGEOB *geob) +{ + av_free(geob->mime_type); + av_free(geob->file_name); + av_free(geob->description); + av_free(geob->data); + av_free(geob); } static int is_number(const char *str) @@ -182,7 +292,28 @@ finish: av_dict_set(m, "date", date, 0); } -static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags) +/** + * Get the corresponding function to parse a certain tag type or free the parsed data. + * @param isv34 Determines if v2.2 or v2.3/4 strings are used + * @param read Determines if the function to read or to free data is returned + * @return If read is non-zero, a pointer to the parse function is returned. If read is zero a pointer to the free function is returned. If no function for the tag could be found, NULL is returned. + */ +static void *get_extra_meta_func(const char *tag, int isv34, int read) +{ +#define funcs ff_id3v2_extra_meta_funcs + int i = 0; + while (funcs[i].tag3) { + if (!memcmp(tag, + (isv34 ? funcs[i].tag4 : funcs[i].tag3), + (isv34 ? 4 : 3))) + return (read ? funcs[i].read : funcs[i].free); + i++; + } + return NULL; +#undef funcs +} + +static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t flags, ID3v2ExtraMeta **extra_meta) { int isv34, tlen, unsync; char tag[5]; @@ -190,8 +321,10 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t int taghdrlen; const char *reason = NULL; AVIOContext pb; + AVIOContext *pbx; unsigned char *buffer = NULL; int buffer_size = 0; + void (*extra_func)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta**) = NULL; switch (version) { case 2: @@ -252,7 +385,8 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t if (tflags & (ID3v2_FLAG_ENCRYPTION | ID3v2_FLAG_COMPRESSION)) { av_log(s, AV_LOG_WARNING, "Skipping encrypted/compressed ID3v2 frame %s.\n", tag); avio_skip(s->pb, tlen); - } else if (tag[0] == 'T') { + /* check for text tag or supported special meta tag */ + } else if (tag[0] == 'T' || (extra_meta && (extra_func = get_extra_meta_func(tag, isv34, 1)))) { if (unsync || tunsync) { int i, j; av_fast_malloc(&buffer, &buffer_size, tlen); @@ -268,10 +402,17 @@ static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, uint8_t } } ffio_init_context(&pb, buffer, j, 0, NULL, NULL, NULL, NULL); - read_ttag(s, &pb, j, tag); + tlen = j; + pbx = &pb; // read from sync buffer } else { - read_ttag(s, s->pb, tlen, tag); + pbx = s->pb; // read straight from input } + if (tag[0] == 'T') + /* parse text tag */ + read_ttag(s, pbx, tlen, tag); + else + /* parse special meta tag */ + extra_func(s, pbx, tlen, tag, extra_meta); } else if (!tag[0]) { if (tag[1]) @@ -295,7 +436,7 @@ seek: return; } -void ff_id3v2_read(AVFormatContext *s, const char *magic) +void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta) { int len, ret; uint8_t buf[ID3v2_HEADER_SIZE]; @@ -315,7 +456,7 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic) ((buf[7] & 0x7f) << 14) | ((buf[8] & 0x7f) << 7) | (buf[9] & 0x7f); - ff_id3v2_parse(s, len, buf[3], buf[5]); + ff_id3v2_parse(s, len, buf[3], buf[5], extra_meta); } else { avio_seek(s->pb, off, SEEK_SET); } @@ -326,6 +467,30 @@ void ff_id3v2_read(AVFormatContext *s, const char *magic) merge_date(&s->metadata); } +void ff_id3v2_read(AVFormatContext *s, const char *magic) +{ + ff_id3v2_read_all(s, magic, NULL); +} + +void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta) +{ + ID3v2ExtraMeta *current = *extra_meta, *next; + void (*free_func)(ID3v2ExtraMeta*); + + while (current) { + if ((free_func = get_extra_meta_func(current->tag, 1, 0))) + free_func(current->data); + next = current->next; + av_freep(¤t); + current = next; + } +} + +const struct ID3v2EMFunc ff_id3v2_extra_meta_funcs[] = { + { "GEO", "GEOB", read_geobtag, free_geobtag }, + { NULL } +}; + const AVMetadataConv ff_id3v2_34_metadata_conv[] = { { "TALB", "album"}, { "TCOM", "composer"}, diff --git a/libavformat/id3v2.h b/libavformat/id3v2.h index 3e0e65a..8e50bcf 100644 --- a/libavformat/id3v2.h +++ b/libavformat/id3v2.h @@ -45,6 +45,27 @@ enum ID3v2Encoding { ID3v2_ENCODING_UTF8 = 3, }; +typedef struct ID3v2ExtraMeta { + const char *tag; + void *data; + struct ID3v2ExtraMeta *next; +} ID3v2ExtraMeta; + +typedef struct ID3v2ExtraMetaGEOB { + uint32_t datasize; + char *mime_type; + char *file_name; + char *description; + uint8_t *data; +} ID3v2ExtraMetaGEOB; + +typedef struct ID3v2EMFunc { + const char *tag3; + const char *tag4; + void (*read)(AVFormatContext*, AVIOContext*, int, char*, ID3v2ExtraMeta **); + void (*free)(); +} ID3v2EMFunc; + /** * Detect ID3v2 Header. * @param buf must be ID3v2_HEADER_SIZE byte long @@ -61,10 +82,25 @@ int ff_id3v2_match(const uint8_t *buf, const char *magic); int ff_id3v2_tag_len(const uint8_t *buf); /** - * Read an ID3v2 tag + * Read an ID3v2 tag (text tags only) */ void ff_id3v2_read(AVFormatContext *s, const char *magic); +/** + * Read an ID3v2 tag, including supported extra metadata (currently only GEOB) + * @param extra_meta If not NULL, extra metadata is parsed into a list of + * ID3v2ExtraMeta structs and *extra_meta points to the head of the list + */ +void ff_id3v2_read_all(AVFormatContext *s, const char *magic, ID3v2ExtraMeta **extra_meta); + +/** + * Free memory allocated parsing special (non-text) metadata. + * @param extra_meta Pointer to a pointer to the head of a ID3v2ExtraMeta list, *extra_meta is set to NULL. + */ +void ff_id3v2_free_extra_meta(ID3v2ExtraMeta **extra_meta); + +extern const ID3v2EMFunc ff_id3v2_extra_meta_funcs[]; + extern const AVMetadataConv ff_id3v2_34_metadata_conv[]; extern const AVMetadataConv ff_id3v2_4_metadata_conv[]; extern const AVMetadataConv ff_id3v2_2_metadata_conv[]; -- 1.7.2.5
_______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
