On Tue, 13 Sep 2011 21:03:06 +0200, David Goldwich <[email protected]> 
wrote:
Non-text part: multipart/mixed
> I have reworked the whole thing, the decoding function now takes care of
> allocating the buffer dynamically.
> 
> Please let me know what you think.
> 
> David
> From c1aa780bfd1709adbbe9ed9b8f41dd51be304cb9 Mon Sep 17 00:00:00 2001
> From: David Goldwich <[email protected]>
> Date: Tue, 13 Sep 2011 18:57:50 +0200
> Subject: [PATCH] id3v2: add support for non-text and GEOB type tag frames
> 
> This extends the ID3v2 parser to allow for reading of non-text (i.e.
> other than T***) meta tag frames providing a ff_id3v2_read_all()
> function. An additional data structure 'ID3v2ExtraMeta' is introduced
> for these tags since AVDictionary is string oriented and unsuitable
> for binary data.
> 
> A parser for tag frames of type GEOB is implemented, which is needed
> to extract keyring information from encrypted OMA files. GEOB data
> is parsed into 'ID3v2ExtraMetaGEOB' data structures.
> 
> The routine to decode characters from different encodings to UTF-8,
> formerly part of the read_ttag() function, is moved to its own
> function. Because some tag frames contain subparts of unknown length,
> the function is now also able to read until a null character is found.
> In addition, the function now takes care of allocating a buffer long
> enough to hold the decoded characters.
> 
> Signed-off-by: David Goldwich <[email protected]>
> ---
>  libavformat/id3v2.c |  231 +++++++++++++++++++++++++++++++++++++++++++-------
>  libavformat/id3v2.h |   38 ++++++++-
>  2 files changed, 235 insertions(+), 34 deletions(-)
> 
> diff --git a/libavformat/id3v2.c b/libavformat/id3v2.c
> index be6c03b..7d5c311 100644
> --- a/libavformat/id3v2.c
> +++ b/libavformat/id3v2.c
> @@ -19,6 +19,7 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 
> USA
>   */
>  
> +#include <inttypes.h>
>  #include "id3v2.h"
>  #include "id3v1.h"
>  #include "libavutil/avstring.h"
> @@ -59,29 +60,46 @@ static unsigned int get_size(AVIOContext *s, int len)
>      return v;
>  }
>  
> -static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const 
> char *key)
> +/**
> + * Decode content to UTF-8 according to encoding type. Depending on the given
> + * taglen argument, either taglen bytes are read from the AVIOContext or 
> until
> + * the first U+0000 character. The decoded buffer is always null terminated.
> + *
> + * @param dst Pointer where the address of the buffer with the decoded bytes 
> is
> + * stored. Buffer must be freed by caller.
> + * @param dstlen Pointer to an int where the length of the decoded string
> + * is stored (in bytes, incl. null termination)
> + * @param taglen The length of the tag. If this is <0 then pb is read until a
> + * null character is found. Otherwise taglen bytes are read.
> + * @param key The tag frame key (for logging)
> + * @returns The number of bytes read from pb
> + */
> +static unsigned int decode_str(AVFormatContext *s, AVIOContext *pb, int type,

'type' is not very descriptive here, can you change it to 'encoding'?

> +                               char **dst, int *dstlen, int taglen, const 
> char *key)
>  {
> -    char *q, dst[512];
> -    const char *val = NULL;
> -    int len, dstlen = sizeof(dst) - 1;
> -    unsigned genre;
> +    int len = 0;
> +    const int seeknull = (taglen < 0);
> +    uint8_t tmp;
> +    uint32_t ch = 1;
> +    const uint64_t oldpos = avio_tell(pb);
>      unsigned int (*get)(AVIOContext*) = avio_rb16;
> +    AVIOContext *dynbuf;
>  
> -    dst[0] = 0;
> -    if (taglen < 1)
> -        return;
> +    if (!dst)
> +        return 0;

This should be an assert or not needed at all.

>  
> -    taglen--; /* account for encoding type byte */
> +    if (avio_open_dyn_buf(&dynbuf)) {
> +        av_log(s, AV_LOG_ERROR, "Error opening memory stream\n");
> +        return 0;
> +    }
>  

The error is not reported in any way and dst remains uninitialised, so
the caller accessing it will fail spectacularly.
I'd appreciate it if this function returned the error code in some way.

> -    switch (avio_r8(pb)) { /* encoding type */
> +    switch (type) { /* encoding type */
>  
>      case ID3v2_ENCODING_ISO8859:
> -        q = dst;
> -        while (taglen-- && q - dst < dstlen - 7) {
> -            uint8_t tmp;
> -            PUT_UTF8(avio_r8(pb), tmp, *q++ = tmp;)
> +        while (!seeknull && taglen-- || seeknull && ch) {

Some parentheses here would improve readability. Same below.

> +            ch = avio_r8(pb);
> +            PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
>          }
> -        *q = 0;
>          break;
>  
>      case ID3v2_ENCODING_UTF16BOM:
> @@ -93,31 +111,54 @@ static void read_ttag(AVFormatContext *s, AVIOContext 
> *pb, int taglen, const cha
>              break;
>          default:
>              av_log(s, AV_LOG_ERROR, "Incorrect BOM value in tag %s.\n", key);
> -            return;
> +            return 2;

You're leaking dynbuf here.

>          }
>          // fall-through
>  
>      case ID3v2_ENCODING_UTF16BE:
> -        q = dst;
> -        while (taglen > 1 && q - dst < dstlen - 7) {
> -            uint32_t ch;
> -            uint8_t tmp;
> -
> -            GET_UTF16(ch, ((taglen -= 2) >= 0 ? get(pb) : 0), break;)
> -            PUT_UTF8(ch, tmp, *q++ = tmp;)
> +        while (!seeknull && taglen > 1 || seeknull && ch) {
> +            GET_UTF16(ch, (seeknull || (taglen -= 2) >= 0 ? get(pb) : 0), 
> break;)
> +            PUT_UTF8(ch, tmp, avio_w8(dynbuf, tmp);)
>          }
> -        *q = 0;
>          break;
>  
>      case ID3v2_ENCODING_UTF8:
> -        len = FFMIN(taglen, dstlen);
> -        avio_read(pb, dst, len);
> -        dst[len] = 0;
> +        while (!seeknull && taglen-- || seeknull && ch) {
> +            ch = avio_r8(pb);
> +            avio_w8(dynbuf, ch);
> +        }
>          break;
>      default:
>          av_log(s, AV_LOG_WARNING, "Unknown encoding in tag %s.\n", key);
>      }
>  
> +    if (ch)
> +        avio_w8(dynbuf, 0);
> +
> +    len = avio_close_dyn_buf(dynbuf, (uint8_t **)dst);

Why not make dst uint8_t** and avoid the cast?

> +    if (dstlen)
> +        *dstlen = len;
> +
> +    return avio_tell(pb) - oldpos;
> +}
> +
> +/**
> + * Parse a text tag.
> + */
> +static void read_ttag(AVFormatContext *s, AVIOContext *pb, int taglen, const 
> char *key)
> +{
> +    char *dst;
> +    const char *val = NULL;
> +    int len, dstlen;
> +    unsigned genre;
> +
> +    if (taglen < 1)
> +        return;
> +
> +    taglen--; /* account for encoding type byte */
> +
> +    decode_str(s, pb, avio_r8(pb), &dst, &dstlen, taglen, key);
> +
>      if (!(strcmp(key, "TCON") && strcmp(key, "TCO"))
>          && (sscanf(dst, "(%d)", &genre) == 1 || sscanf(dst, "%d", &genre) == 
> 1)
>          && genre <= ID3v1_GENRE_MAX)
> @@ -134,6 +175,75 @@ static void read_ttag(AVFormatContext *s, AVIOContext 
> *pb, int taglen, const cha
>  
>      if (val)
>          av_dict_set(&s->metadata, key, val, AV_DICT_DONT_OVERWRITE);
> +
> +    av_free(dst);
> +}
> +
> +/**
> + * Parse GEOB tag into a ID3v2ExtraMetaGEOB struct.
> + */
> +static void read_geobtag(AVFormatContext *s, AVIOContext *pb, int taglen, 
> char *tag, ID3v2ExtraMeta **extra_meta)
> +{
> +    ID3v2ExtraMetaGEOB *geob_data;
> +    ID3v2ExtraMeta *new_extra;
> +    char encoding;
> +    unsigned int len;
> +
> +    if (taglen < 1)
> +        return;
> +
> +    geob_data = av_mallocz(sizeof(ID3v2ExtraMetaGEOB));
> +    if (!geob_data) {
> +        av_log(s, AV_LOG_ERROR, "Failed to alloc %"PRIuPTR" bytes\n", 
> sizeof(ID3v2ExtraMetaGEOB));
> +        return;
> +    }
> +
> +    new_extra = av_mallocz(sizeof(ID3v2ExtraMeta));
> +    if (!new_extra) {
> +        av_log(s, AV_LOG_ERROR, "Failed to alloc %"PRIuPTR" bytes\n", 
> sizeof(ID3v2ExtraMeta));
> +        return;
> +    }
> +
> +    /* read encoding type byte */
> +    encoding = avio_r8(pb);
> +    taglen--;
> +
> +    /* read MIME type (always ISO-8859) and save as UTF-8 */
> +    taglen -= decode_str(s, pb, ID3v2_ENCODING_ISO8859, 
> &geob_data->mime_type, NULL, -1, "GEOB");
> +
> +    /* read file name and save as UTF-8 */
> +    taglen -= decode_str(s, pb, encoding, &geob_data->file_name, NULL, -1, 
> "GEOB");
> +
> +    /* read content description and save as UTF-8 */
> +    taglen -= decode_str(s, pb, encoding, &geob_data->description, NULL, -1, 
> "GEOB");
> +
> +    /* save encapsulated binary data */
> +    geob_data->data = av_malloc(taglen);

taglen might be negative at this point. It might be better to make
decode_str() able to 'read until first NULL, but no more that x bytes'

> +    if (!geob_data->data) {
> +        av_log(s, AV_LOG_ERROR, "Failed to alloc %d bytes\n", taglen);
> +        return;

You're leaking geob_data and its contents.

> +    }
> +    if ((len = avio_read(pb, geob_data->data, taglen)) < taglen)
> +        av_log(s, AV_LOG_WARNING, "Error reading GEOB frame, data 
> truncated.\n");
> +    geob_data->datasize = len;
> +
> +    /* add data to the list */
> +    new_extra->tag = "GEOB";
> +    new_extra->data = geob_data;
> +    new_extra->next = *extra_meta;
> +    *extra_meta = new_extra;
> +}
> +
> +/**
> + * Free GEOB type extra metadata.
> + */
> +static void free_geobtag(ID3v2ExtraMetaGEOB *geob)
> +{
> +    av_free(geob->mime_type);
> +    av_free(geob->file_name);
> +    av_free(geob->description);
> +    av_free(geob->data);
> +    av_free(geob);
>  }
>  
>  static int is_number(const char *str)
> @@ -182,7 +292,28 @@ finish:
>          av_dict_set(m, "date", date, 0);
>  }
>  
> -static void ff_id3v2_parse(AVFormatContext *s, int len, uint8_t version, 
> uint8_t flags)
> +/**
> + * Get the corresponding function to parse a certain tag type or free the 
> parsed data.
> + * @param isv34 Determines if v2.2 or v2.3/4 strings are used
> + * @param read Determines if the function to read or to free data is returned
> + * @return If read is non-zero, a pointer to the parse function is returned. 
> If read is zero a pointer to the free function is returned. If no function 
> for the tag could be found, NULL is returned.
> + */
> +static void *get_extra_meta_func(const char *tag, int isv34, int read)
> +{
> +#define funcs ff_id3v2_extra_meta_funcs

Wouldn't it be better to just return the pointer to ID3v2EMFunc?
Also I don't particularly like this #define.

Overall nice work. I like how you use the dynbuf API.

-- 
Anton Khirnov
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to