PR #23064 opened by Jun Zhao (mypopydev) URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23064 Patch URL: https://code.ffmpeg.org/FFmpeg/FFmpeg/pulls/23064.patch
From 3f6727191b233afc288f3b273a7227ed0bc59ee4 Mon Sep 17 00:00:00 2001 From: Jun Zhao <[email protected]> Date: Sun, 10 May 2026 20:17:39 +0800 Subject: [PATCH 1/2] lavfi/vf_drawtext: fix HarfBuzz shaping of Bengali / Indic scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit shape_text_hb() set HB_SCRIPT_LATIN and called hb_buffer_guess_segment_properties() on an empty buffer, so the inference was a no-op. Bengali and other Indic / USE scripts reached the default OT shaper instead of their script-specific shaper, leaving the virama visible and consonants disjointed (e.g. স্টারমার rendered as স্ টারমার). Add the UTF-8 text first, then guess segment properties so the script and direction come from the actual Unicode contents. Fixes: https://code.ffmpeg.org/FFmpeg/FFmpeg/issues/23014 Signed-off-by: Jun Zhao <[email protected]> --- libavfilter/vf_drawtext.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c index 005e150de7..de0bc07f89 100644 --- a/libavfilter/vf_drawtext.c +++ b/libavfilter/vf_drawtext.c @@ -1362,18 +1362,19 @@ static int draw_glyphs(AVFilterContext *ctx, AVFrame *frame, static int shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen) { hb->buf = hb_buffer_create(); - if(!hb_buffer_allocation_successful(hb->buf)) { - return AVERROR(ENOMEM); - } - hb_buffer_set_direction(hb->buf, HB_DIRECTION_LTR); - hb_buffer_set_script(hb->buf, HB_SCRIPT_LATIN); - hb_buffer_set_language(hb->buf, hb_language_from_string("en", -1)); - hb_buffer_guess_segment_properties(hb->buf); - hb->font = hb_ft_font_create_referenced(s->face); - if(hb->font == NULL) { + if (!hb_buffer_allocation_successful(hb->buf)) { return AVERROR(ENOMEM); } hb_buffer_add_utf8(hb->buf, text, textLen, 0, -1); + /* Let HarfBuzz infer script and direction from the buffer's Unicode + * contents so complex scripts (Bengali / Indic / USE) are dispatched + * to the correct shaper. The previous unconditional HB_SCRIPT_LATIN + * setting forced the default OT shaper and broke conjunct formation. */ + hb_buffer_guess_segment_properties(hb->buf); + hb->font = hb_ft_font_create_referenced(s->face); + if (hb->font == NULL) { + return AVERROR(ENOMEM); + } hb_shape(hb->font, hb->buf, NULL, 0); hb->glyph_info = hb_buffer_get_glyph_infos(hb->buf, &hb->glyph_count); hb->glyph_pos = hb_buffer_get_glyph_positions(hb->buf, &hb->glyph_count); -- 2.52.0 From ac1aac19af4dfcd2d32cf408ae2eaecc4b00d0e0 Mon Sep 17 00:00:00 2001 From: Jun Zhao <[email protected]> Date: Sun, 10 May 2026 20:17:55 +0800 Subject: [PATCH 2/2] lavfi/vf_drawtext: log inferred shaping properties at verbose Log the script and direction picked by HarfBuzz, plus codepoint and glyph counts, so the shaper choice can be verified. Differing codepoint and glyph counts indicate reordering / ligation / decomposition. Codepoints are sampled before hb_shape(), which flips the buffer content type to GLYPHS. Signed-off-by: Jun Zhao <[email protected]> --- libavfilter/vf_drawtext.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/libavfilter/vf_drawtext.c b/libavfilter/vf_drawtext.c index de0bc07f89..726b54d69f 100644 --- a/libavfilter/vf_drawtext.c +++ b/libavfilter/vf_drawtext.c @@ -1359,8 +1359,11 @@ static int draw_glyphs(AVFilterContext *ctx, AVFrame *frame, } // Shapes a line of text using libharfbuzz -static int shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, int textLen) +static int shape_text_hb(AVFilterContext *ctx, DrawTextContext *s, + HarfbuzzData *hb, const char *text, int textLen) { + unsigned codepoints; + hb->buf = hb_buffer_create(); if (!hb_buffer_allocation_successful(hb->buf)) { return AVERROR(ENOMEM); @@ -1371,6 +1374,9 @@ static int shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, * to the correct shaper. The previous unconditional HB_SCRIPT_LATIN * setting forced the default OT shaper and broke conjunct formation. */ hb_buffer_guess_segment_properties(hb->buf); + /* Sample the buffer length here, before hb_shape() flips the buffer's + * content type from UNICODE to GLYPHS. */ + codepoints = hb_buffer_get_length(hb->buf); hb->font = hb_ft_font_create_referenced(s->face); if (hb->font == NULL) { return AVERROR(ENOMEM); @@ -1379,6 +1385,17 @@ static int shape_text_hb(DrawTextContext *s, HarfbuzzData* hb, const char* text, hb->glyph_info = hb_buffer_get_glyph_infos(hb->buf, &hb->glyph_count); hb->glyph_pos = hb_buffer_get_glyph_positions(hb->buf, &hb->glyph_count); + if (av_log_get_level() >= AV_LOG_VERBOSE) { + char script_tag[5] = { 0 }; + hb_script_t script = hb_buffer_get_script(hb->buf); + hb_direction_t dir = hb_buffer_get_direction(hb->buf); + hb_tag_to_string(hb_script_to_iso15924_tag(script), script_tag); + av_log(ctx, AV_LOG_VERBOSE, + "shape: script=%s direction=%s codepoints=%u glyphs=%u\n", + script_tag, hb_direction_to_string(dir), + codepoints, hb->glyph_count); + } + return 0; } @@ -1427,7 +1444,7 @@ continue_on_failed: // Evaluate the width of the space character if needed to replace tabs if (s->tab_count > 0 && !s->blank_advance64) { HarfbuzzData hb_data; - ret = shape_text_hb(s, &hb_data, " ", 1); + ret = shape_text_hb(ctx, s, &hb_data, " ", 1); if(ret != 0) { goto done; } @@ -1465,7 +1482,7 @@ continue_on_failed2: TextLine *cur_line = &s->lines[line_count]; HarfbuzzData *hb = &cur_line->hb_data; cur_line->cluster_offset = line_offset; - ret = shape_text_hb(s, hb, start, len); + ret = shape_text_hb(ctx, s, hb, start, len); if (ret != 0) { goto done; } -- 2.52.0 _______________________________________________ ffmpeg-devel mailing list -- [email protected] To unsubscribe send an email to [email protected]
