[FFmpeg-devel] [PATCH] lavc/movtextdec: fix incorrect offset calculation for UTF-8 characters

2017-03-07 Thread Erik Bråthen Solem
The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte 
UTF-8 characters as one single character, ffmpeg currently counts bytes. This 
patch inserts an if test such that:
1. continuation bytes are not counted during decoding
2. style boxes will not split these characters

Fixes trac #6021 (decoding part).

---
 libavcodec/movtextdec.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 6de1500..2c7a204 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -342,6 +342,7 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 }
 
 while (text < text_end) {
+if ((*text & 0xC0) != 0x80) { /* Boxes never split multibyte chars */
 if (m->box_flags & STYL_BOX) {
 for (i = 0; i < m->style_entries; i++) {
 if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
@@ -387,6 +388,8 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 }
 }
 }
+text_pos++;
+}
 
 switch (*text) {
 case '\r':
@@ -399,7 +402,6 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 break;
 }
 text++;
-text_pos++;
 }
 
 return 0;
-- 
1.9.5 (Apple Git-50.3)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] lavc/movtextenc: fix incorrect offset calculation for UTF-8 characters

2017-03-07 Thread Erik Bråthen Solem
The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte 
UTF-8 characters as one single character, ffmpeg currently counts bytes. This 
produces files where style boxes have incorrect offsets. This patch introduces:
1. a separate variable that keeps track of the byte count
2. a for loop that excludes continuation bytes from the character counting

Fixes trac #6021 (encoding part).

---
 libavcodec/movtextenc.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..8d09ff4 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@ typedef struct {
 uint8_t style_fontsize;
 uint32_t style_color;
 uint16_t text_pos;
+uint16_t byte_size;
 } MovTextContext;
 
 typedef struct {
@@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, 
int len)
 {
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, text, len);
-s->text_pos += len;
+for (int i = 0; i < len; i++)
+if ((text[i] & 0xC0) != 0x80)
+s->text_pos++; /* increase character count */
+s->byte_size += len; /* increase byte count */
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced)
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, "\n", 1);
 s->text_pos += 1;
+s->byte_size += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, 
unsigned char *buf,
 size_t j;
 
 s->text_pos = 0;
+s->byte_size = 0;
 s->count = 0;
 s->box_flags = 0;
 s->style_entries = 0;
@@ -362,7 +368,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, 
unsigned char *buf,
 }
 }
 
-AV_WB16(buf, s->text_pos);
+AV_WB16(buf, s->byte_size);
 buf += 2;
 
 if (!av_bprint_is_complete(>buffer)) {
-- 
1.9.5 (Apple Git-50.3)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/1] Fixing 3GPP Timed Text (TTXT / tx3g / mov_text) encoding for UTF-8 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
Accidental duplicate of patch 1818.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [FFmpeg-devel, 1/1] libavcodec/movtextdec.c: fixing decoding for UTF-8 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
Done. It was assigned its own patch number (1860), so I am changing the state
of this one to "Superseded".
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/1] Updated version of patch 1840 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
Between testing and patch generation a character was deleted by mistake, which
broke the patch. This updated version fixes this.

Original patch description:
Character offsets were interpreted as byte offsets, resulting in misplaced
styling tags where multibyte characters were involved. The entire subtitle
stream would even be rendered invalid if such a misplaced tag happened to
split a multibyte character. This patch fixes this for UTF-8; UTF-16 was and
still is broken. These are the only supported encodings according to the spec.
---
 libavcodec/movtextdec.c | 95 +++--
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 7b5b161..4cffacf 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -328,6 +328,7 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 int i = 0;
 int j = 0;
 int text_pos = 0;
+int text_pos_chars = 0;
 
 if (text < text_end && m->box_flags & TWRP_BOX) {
 if (m->w.wrap_flag == 1) {
@@ -338,50 +339,59 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 }
 
 while (text < text_end) {
-if (m->box_flags & STYL_BOX) {
-for (i = 0; i < m->style_entries; i++) {
-if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
-av_bprintf(buf, "{\\r}");
+if ((*text & 0xC0) != 0x80) { // Boxes never split multibyte characters
+if (m->box_flags & STYL_BOX) {
+for (i = 0; i < m->style_entries; i++) {
+if (m->s[i]->style_flag &&
+text_pos_chars == m->s[i]->style_end)
+{
+av_bprintf(buf, "{\\r}");
+}
 }
-}
-for (i = 0; i < m->style_entries; i++) {
-if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
-if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
-av_bprintf(buf, "{\\b1}");
-if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
-av_bprintf(buf, "{\\i1}");
-if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
-av_bprintf(buf, "{\\u1}");
-av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
-for (j = 0; j < m->ftab_entries; j++) {
-if (m->s[i]->style_fontID == m->ftab[j]->fontID)
-av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+for (i = 0; i < m->style_entries; i++) {
+if (m->s[i]->style_flag
+&& text_pos_chars == m->s[i]->style_start)
+{
+if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
+av_bprintf(buf, "{\\b1}");
+if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
+av_bprintf(buf, "{\\i1}");
+if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
+av_bprintf(buf, "{\\u1}");
+/* (No need to print font style if equal to default?) 
*/
+av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
+for (j = 0; j < m->ftab_entries; j++) {
+if (m->s[i]->style_fontID == m->ftab[j]->fontID)
+av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+}
 }
 }
 }
-}
-if (m->box_flags & HLIT_BOX) {
-if (text_pos == m->h.hlit_start) {
-/* If hclr box is present, set the secondary color to the color
- * specified. Otherwise, set primary color to white and 
secondary
- * color to black. These colors will come from 
TextSampleModifier
- * boxes in future and inverse video technique for highlight 
will
- * be implemented.
- */
-if (m->box_flags & HCLR_BOX) {
-av_bprintf(buf, "{\\2c%02x%02x%02x&}", 
m->c.hlit_color[2],
-m->c.hlit_color[1], m->c.hlit_color[0]);
-} else {
-av_bprintf(buf, "{\\1c&}{\\2c&}");
+if (m->box_flags & HLIT_BOX) {
+if (text_pos_chars == m->h.hlit_start) {
+/* If hclr box is present, set the secondary color to the
+ * color specified. Otherwise, set primary color to white
+ * and secondary color to black. These colors will come 
from
+ * TextSampleModifier boxes in future and inverse video
+ * technique for highlight will be implemented.
+ */
+ 

Re: [FFmpeg-devel] [PATCH 1/1] Fixing 3GPP Timed Text (TTXT / tx3g / mov_text) encoding for UTF-8 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
Good question. Since text_pos_chars never exceeds the existing 
variable text_pos, I did not think about this.

No, there are no checks. The spec says that "Authors should limit the
string in each text sample to not more than 2048 bytes, for maximum
terminal interoperability", but the code does not enforce this limit
(or the maximum uint16_t value of 65535 for that matter). The likeli-
hood of exceeding this limit is very small, but it does not hurt to
add a check. In any case text_pos >= text_pos_chars, so it should be
sufficient to check just text_pos. In mov_text_new_line_cb we only
increment by 1, so checking if s->text_pos == 0 after that is enough.
In mov_text_text_cb this check can be used instead, placed before the
length len is added to text_pos:
if (len > UINT16_MAX || (s->text_pos > UINT16_MAX - len)) // Overflow

I am new to the project's source code and do not know how errors and
warnings should be handled, but could it be an idea to print a
warning if text_pos > 2048, and print an error message and abort in
case of overflow? Or should the rest of the text just be truncated?

PS. Please excuse the duplicate patch that was sent an hour or two
ago. It is identical to the one I submitted a couple of days ago and
I have no idea why or how that happened.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


Re: [FFmpeg-devel] [PATCH 1/1] libavcodec/movtextdec.c: fixing decoding for UTF-8 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
Yes, it was supposed to be box_types, not ox_types. I must have removed the b 
by mistake after I tested the code. Should I resubmit the patch?
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/1] Fixing 3GPP Timed Text (TTXT / tx3g / mov_text) encoding for UTF-8 (ticket 6021)

2016-12-18 Thread Erik Bråthen Solem
According to the format specification (3GPP TS 26.245, section 5.2) "storage
lengths are specified as byte-counts, wheras highlighting is specified using
character offsets." This patch replaces byte counting with character counting
for highlighting. See the following page for a link to the specification:
https://gpac.wp.mines-telecom.fr/mp4box/ttxt-format-documentation/
---
 libavcodec/movtextenc.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..3ae015a 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@ typedef struct {
 uint8_t style_fontsize;
 uint32_t style_color;
 uint16_t text_pos;
+uint16_t text_pos_chars;
 } MovTextContext;
 
 typedef struct {
@@ -216,10 +217,10 @@ static void mov_text_style_cb(void *priv, const char 
style, int close)
 }
 
 s->style_attributes_temp->style_flag = 0;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 } else {
 if (s->style_attributes_temp->style_flag) { //break the style 
record here and start a new one
-s->style_attributes_temp->style_end = AV_RB16(>text_pos);
+s->style_attributes_temp->style_end = 
AV_RB16(>text_pos_chars);
 av_dynarray_add(>style_attributes, >count, 
s->style_attributes_temp);
 s->style_attributes_temp = 
av_malloc(sizeof(*s->style_attributes_temp));
 if (!s->style_attributes_temp) {
@@ -230,10 +231,10 @@ static void mov_text_style_cb(void *priv, const char 
style, int close)
 }
 
 s->style_attributes_temp->style_flag = 
s->style_attributes[s->count - 1]->style_flag;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 } else {
 s->style_attributes_temp->style_flag = 0;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 }
 }
 switch (style){
@@ -248,7 +249,7 @@ static void mov_text_style_cb(void *priv, const char style, 
int close)
 break;
 }
 } else {
-s->style_attributes_temp->style_end = AV_RB16(>text_pos);
+s->style_attributes_temp->style_end = AV_RB16(>text_pos_chars);
 av_dynarray_add(>style_attributes, >count, 
s->style_attributes_temp);
 
 s->style_attributes_temp = 
av_malloc(sizeof(*s->style_attributes_temp));
@@ -273,7 +274,7 @@ static void mov_text_style_cb(void *priv, const char style, 
int close)
 break;
 }
 if (s->style_attributes_temp->style_flag) { //start of new style record
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 }
 }
 s->box_flags |= STYL_BOX;
@@ -284,11 +285,11 @@ static void mov_text_color_cb(void *priv, unsigned int 
color, unsigned int color
 MovTextContext *s = priv;
 if (color_id == 2) {//secondary color changes
 if (s->box_flags & HLIT_BOX) {  //close tag
-s->hlit.end = AV_RB16(>text_pos);
+s->hlit.end = AV_RB16(>text_pos_chars);
 } else {
 s->box_flags |= HCLR_BOX;
 s->box_flags |= HLIT_BOX;
-s->hlit.start = AV_RB16(>text_pos);
+s->hlit.start = AV_RB16(>text_pos_chars);
 s->hclr.color = color | (0xFF << 24);  //set alpha value to FF
 }
 }
@@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, 
int len)
 {
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, text, len);
-s->text_pos += len;
+s->text_pos += len; // length of text in bytes
+for (int i = 0; i < len; i++)   // length of text in UTF-8 characters
+if ((text[i] & 0xC0) != 0x80)
+s->text_pos_chars++;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced)
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, "\n", 1);
 s->text_pos += 1;
+s->text_pos_chars += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, 
unsigned char *buf,
 size_t j;
 
 s->text_pos = 0;
+s->text_pos_chars = 0;
 s->count = 0;
 s->box_flags = 0;
 s->style_entries = 0;
-- 
1.9.5 (Apple Git-50.3)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH 1/1] libavcodec/movtextdec.c: fixing decoding for UTF-8 (ticket 6021)

2016-12-16 Thread Erik Bråthen Solem
Character offsets were interpreted as byte offsets, resulting in misplaced
styling tags where multibyte characters were involved. The entire subtitle
stream would even be rendered invalid if such a misplaced tag happened to
split a multibyte character. This patch fixes this for UTF-8; UTF-16 was and
still is broken. These are the only supported encodings according to the spec.

---
 libavcodec/movtextdec.c | 95 +++--
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/libavcodec/movtextdec.c b/libavcodec/movtextdec.c
index 7b5b161..6e1ff73 100644
--- a/libavcodec/movtextdec.c
+++ b/libavcodec/movtextdec.c
@@ -328,6 +328,7 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 int i = 0;
 int j = 0;
 int text_pos = 0;
+int text_pos_chars = 0;
 
 if (text < text_end && m->box_flags & TWRP_BOX) {
 if (m->w.wrap_flag == 1) {
@@ -338,50 +339,59 @@ static int text_to_ass(AVBPrint *buf, const char *text, 
const char *text_end,
 }
 
 while (text < text_end) {
-if (m->box_flags & STYL_BOX) {
-for (i = 0; i < m->style_entries; i++) {
-if (m->s[i]->style_flag && text_pos == m->s[i]->style_end) {
-av_bprintf(buf, "{\\r}");
+if ((*text & 0xC0) != 0x80) { // Boxes never split multibyte characters
+if (m->box_flags & STYL_BOX) {
+for (i = 0; i < m->style_entries; i++) {
+if (m->s[i]->style_flag &&
+text_pos_chars == m->s[i]->style_end)
+{
+av_bprintf(buf, "{\\r}");
+}
 }
-}
-for (i = 0; i < m->style_entries; i++) {
-if (m->s[i]->style_flag && text_pos == m->s[i]->style_start) {
-if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
-av_bprintf(buf, "{\\b1}");
-if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
-av_bprintf(buf, "{\\i1}");
-if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
-av_bprintf(buf, "{\\u1}");
-av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
-for (j = 0; j < m->ftab_entries; j++) {
-if (m->s[i]->style_fontID == m->ftab[j]->fontID)
-av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+for (i = 0; i < m->style_entries; i++) {
+if (m->s[i]->style_flag
+&& text_pos_chars == m->s[i]->style_start)
+{
+if (m->s[i]->style_flag & STYLE_FLAG_BOLD)
+av_bprintf(buf, "{\\b1}");
+if (m->s[i]->style_flag & STYLE_FLAG_ITALIC)
+av_bprintf(buf, "{\\i1}");
+if (m->s[i]->style_flag & STYLE_FLAG_UNDERLINE)
+av_bprintf(buf, "{\\u1}");
+/* (No need to print font style if equal to default?) 
*/
+av_bprintf(buf, "{\\fs%d}", m->s[i]->fontsize);
+for (j = 0; j < m->ftab_entries; j++) {
+if (m->s[i]->style_fontID == m->ftab[j]->fontID)
+av_bprintf(buf, "{\\fn%s}", m->ftab[j]->font);
+}
 }
 }
 }
-}
-if (m->box_flags & HLIT_BOX) {
-if (text_pos == m->h.hlit_start) {
-/* If hclr box is present, set the secondary color to the color
- * specified. Otherwise, set primary color to white and 
secondary
- * color to black. These colors will come from 
TextSampleModifier
- * boxes in future and inverse video technique for highlight 
will
- * be implemented.
- */
-if (m->box_flags & HCLR_BOX) {
-av_bprintf(buf, "{\\2c%02x%02x%02x&}", 
m->c.hlit_color[2],
-m->c.hlit_color[1], m->c.hlit_color[0]);
-} else {
-av_bprintf(buf, "{\\1c&}{\\2c&}");
+if (m->box_flags & HLIT_BOX) {
+if (text_pos_chars == m->h.hlit_start) {
+/* If hclr box is present, set the secondary color to the
+ * color specified. Otherwise, set primary color to white
+ * and secondary color to black. These colors will come 
from
+ * TextSampleModifier boxes in future and inverse video
+ * technique for highlight will be implemented.
+ */
+if (m->box_flags & HCLR_BOX) {
+av_bprintf(buf, "{\\2c%02x%02x%02x&}",
+m->c.hlit_color[2], 

[FFmpeg-devel] [PATCH 1/1] Fixing 3GPP Timed Text (TTXT / tx3g / mov_text) encoding for UTF-8 (ticket 6021)

2016-12-15 Thread Erik Bråthen Solem
According to the format specification (3GPP TS 26.245, section 5.2) "storage
lengths are specified as byte-counts, wheras highlighting is specified using
character offsets." This patch replaces byte counting with character counting
for highlighting. See the following page for a link to the specification:
https://gpac.wp.mines-telecom.fr/mp4box/ttxt-format-documentation/
---
 libavcodec/movtextenc.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..3ae015a 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@ typedef struct {
 uint8_t style_fontsize;
 uint32_t style_color;
 uint16_t text_pos;
+uint16_t text_pos_chars;
 } MovTextContext;
 
 typedef struct {
@@ -216,10 +217,10 @@ static void mov_text_style_cb(void *priv, const char 
style, int close)
 }
 
 s->style_attributes_temp->style_flag = 0;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 } else {
 if (s->style_attributes_temp->style_flag) { //break the style 
record here and start a new one
-s->style_attributes_temp->style_end = AV_RB16(>text_pos);
+s->style_attributes_temp->style_end = 
AV_RB16(>text_pos_chars);
 av_dynarray_add(>style_attributes, >count, 
s->style_attributes_temp);
 s->style_attributes_temp = 
av_malloc(sizeof(*s->style_attributes_temp));
 if (!s->style_attributes_temp) {
@@ -230,10 +231,10 @@ static void mov_text_style_cb(void *priv, const char 
style, int close)
 }
 
 s->style_attributes_temp->style_flag = 
s->style_attributes[s->count - 1]->style_flag;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 } else {
 s->style_attributes_temp->style_flag = 0;
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 }
 }
 switch (style){
@@ -248,7 +249,7 @@ static void mov_text_style_cb(void *priv, const char style, 
int close)
 break;
 }
 } else {
-s->style_attributes_temp->style_end = AV_RB16(>text_pos);
+s->style_attributes_temp->style_end = AV_RB16(>text_pos_chars);
 av_dynarray_add(>style_attributes, >count, 
s->style_attributes_temp);
 
 s->style_attributes_temp = 
av_malloc(sizeof(*s->style_attributes_temp));
@@ -273,7 +274,7 @@ static void mov_text_style_cb(void *priv, const char style, 
int close)
 break;
 }
 if (s->style_attributes_temp->style_flag) { //start of new style record
-s->style_attributes_temp->style_start = AV_RB16(>text_pos);
+s->style_attributes_temp->style_start = 
AV_RB16(>text_pos_chars);
 }
 }
 s->box_flags |= STYL_BOX;
@@ -284,11 +285,11 @@ static void mov_text_color_cb(void *priv, unsigned int 
color, unsigned int color
 MovTextContext *s = priv;
 if (color_id == 2) {//secondary color changes
 if (s->box_flags & HLIT_BOX) {  //close tag
-s->hlit.end = AV_RB16(>text_pos);
+s->hlit.end = AV_RB16(>text_pos_chars);
 } else {
 s->box_flags |= HCLR_BOX;
 s->box_flags |= HLIT_BOX;
-s->hlit.start = AV_RB16(>text_pos);
+s->hlit.start = AV_RB16(>text_pos_chars);
 s->hclr.color = color | (0xFF << 24);  //set alpha value to FF
 }
 }
@@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, 
int len)
 {
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, text, len);
-s->text_pos += len;
+s->text_pos += len; // length of text in bytes
+for (int i = 0; i < len; i++)   // length of text in UTF-8 characters
+if ((text[i] & 0xC0) != 0x80)
+s->text_pos_chars++;
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced)
 MovTextContext *s = priv;
 av_bprint_append_data(>buffer, "\n", 1);
 s->text_pos += 1;
+s->text_pos_chars += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, 
unsigned char *buf,
 size_t j;
 
 s->text_pos = 0;
+s->text_pos_chars = 0;
 s->count = 0;
 s->box_flags = 0;
 s->style_entries = 0;
-- 
1.9.5 (Apple Git-50.3)

___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel