This is an automated email from the ASF dual-hosted git repository.

linkinstar pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/answer.git


The following commit(s) were added to refs/heads/dev by this push:
     new ce053ccf fix: multi byte run boundary for cut long title
ce053ccf is described below

commit ce053ccfa6620cc4c8263ba3b54dfcd988a1e6e9
Author: ferhat elmas <[email protected]>
AuthorDate: Tue Nov 25 00:34:24 2025 +0100

    fix: multi byte run boundary for cut long title
    
    Signed-off-by: ferhat elmas <[email protected]>
---
 pkg/htmltext/htmltext.go      | 12 +++++++++---
 pkg/htmltext/htmltext_test.go | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/pkg/htmltext/htmltext.go b/pkg/htmltext/htmltext.go
index 0b84cda4..707c20a8 100644
--- a/pkg/htmltext/htmltext.go
+++ b/pkg/htmltext/htmltext.go
@@ -96,10 +96,16 @@ func convertChinese(content string) string {
 }
 
 func cutLongTitle(title string) string {
-       if len(title) > 150 {
-               return title[0:150]
+       maxBytes := 150
+       if len(title) <= maxBytes {
+               return title
        }
-       return title
+
+       truncated := title[:maxBytes]
+       for len(truncated) > 0 && !utf8.ValidString(truncated) {
+               truncated = truncated[:len(truncated)-1]
+       }
+       return truncated
 }
 
 // FetchExcerpt return the excerpt from the HTML string
diff --git a/pkg/htmltext/htmltext_test.go b/pkg/htmltext/htmltext_test.go
index d549d887..63866eb2 100644
--- a/pkg/htmltext/htmltext_test.go
+++ b/pkg/htmltext/htmltext_test.go
@@ -21,6 +21,7 @@ package htmltext
 
 import (
        "fmt"
+       "strings"
        "testing"
 
        "github.com/stretchr/testify/assert"
@@ -178,6 +179,27 @@ func TestFetchRangedExcerpt(t *testing.T) {
        assert.Equal(t, expected, actual)
 }
 
+func TestCutLongTitle(t *testing.T) {
+       // Short title, no cutting needed
+       short := "hello"
+       assert.Equal(t, short, cutLongTitle(short))
+
+       // Exactly max bytes, no cutting needed
+       exact150 := strings.Repeat("a", 150)
+       assert.Equal(t, 150, len(cutLongTitle(exact150)))
+
+       // Just over max bytes, should be cut
+       exact151 := strings.Repeat("a", 151)
+       assert.Equal(t, 150, len(cutLongTitle(exact151)))
+
+       // Multi-byte rune at boundary gets removed properly
+       asciiPart := strings.Repeat("a", 149) // 149 bytes
+       multiByteChar := "δΈ­"                  // 3 bytes - will span bytes 
149-151
+       title := asciiPart + multiByteChar    // 152 bytes total
+
+       assert.Equal(t, asciiPart, cutLongTitle(title))
+}
+
 func TestFetchMatchedExcerpt(t *testing.T) {
        var (
                expected,

Reply via email to