This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-mime4j.git

commit 13597e44deb1e5870e0615af56a71254f14e6690
Author: Benoit TELLIER <[email protected]>
AuthorDate: Sun Mar 8 22:59:02 2026 +0100

    [PERF] Reduce String allocs in RawFieldParser
---
 .../apache/james/mime4j/stream/RawFieldParser.java | 105 ++++++++++++++++-----
 1 file changed, 81 insertions(+), 24 deletions(-)

diff --git 
a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java 
b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
index b39cad33..80eb52b2 100644
--- a/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
+++ b/core/src/main/java/org/apache/james/mime4j/stream/RawFieldParser.java
@@ -43,6 +43,12 @@ import org.apache.james.mime4j.util.ContentUtil;
  */
 public class RawFieldParser {
 
+    // Reused per-thread byte accumulation buffer for non-ASCII / 
quoted-string content.
+    // Safe because copyContent, copyUnquotedContent and copyQuotedContent are 
leaf methods
+    // that never call each other, so the buffer is never accessed 
re-entrantly.
+    private static final ThreadLocal<ByteArrayBuffer> DECODE_BUFFER =
+            ThreadLocal.withInitial(() -> new ByteArrayBuffer(256));
+
     public static BitSet INIT_BITSET(int ... b) {
         BitSet bitset = new BitSet(b.length);
         for (int aB : b) {
@@ -323,22 +329,43 @@ public class RawFieldParser {
      */
     public void copyContent(final ByteSequence buf, final ParserCursor cursor, 
final BitSet delimiters,
             final StringBuilder dst) {
-        ByteArrayBuffer dstRaw = new ByteArrayBuffer(80);
         int pos = cursor.getPos();
-        int indexFrom = cursor.getPos();
+        int indexFrom = pos;
         int indexTo = cursor.getUpperBound();
+        int dstStart = dst.length();
         for (int i = indexFrom; i < indexTo; i++) {
-            char current = (char) (buf.byteAt(i) & 0xff);
+            byte bVal = buf.byteAt(i);
+            char current = (char) (bVal & 0xff);
             if ((delimiters != null && delimiters.get(current))
                     || CharsetUtil.isWhitespace(current) || current == '(') {
                 break;
-            } else {
-                pos++;
-                dstRaw.append(current);
             }
+            pos++;
+            if (bVal < 0) {
+                // Non-ASCII byte: undo the ASCII chars written so far, redo 
via UTF-8 decode
+                dst.setLength(dstStart);
+                ByteArrayBuffer dstRaw = DECODE_BUFFER.get();
+                dstRaw.clear();
+                for (int j = indexFrom; j <= i; j++) {
+                    dstRaw.append(buf.byteAt(j));
+                }
+                for (int k = i + 1; k < indexTo; k++) {
+                    byte bk = buf.byteAt(k);
+                    char ck = (char) (bk & 0xff);
+                    if ((delimiters != null && delimiters.get(ck))
+                            || CharsetUtil.isWhitespace(ck) || ck == '(') {
+                        break;
+                    }
+                    pos++;
+                    dstRaw.append(bk);
+                }
+                cursor.updatePos(pos);
+                dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw));
+                return;
+            }
+            dst.append(current);
         }
         cursor.updatePos(pos);
-        dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw));
     }
 
     /**
@@ -354,24 +381,41 @@ public class RawFieldParser {
     public void copyUnquotedContent(final ByteSequence buf, final ParserCursor 
cursor, final BitSet delimiters,
                             final StringBuilder dst) {
         int pos = cursor.getPos();
-        int indexFrom = cursor.getPos();
+        int indexFrom = pos;
         int indexTo = cursor.getUpperBound();
-
-        ByteArrayBuffer dstRaw = new ByteArrayBuffer(indexTo - indexFrom);
-
+        int dstStart = dst.length();
         for (int i = indexFrom; i < indexTo; i++) {
-            byte currentByte = buf.byteAt(i);
-            char current = (char) (currentByte & 0xff);
+            byte bVal = buf.byteAt(i);
+            char current = (char) (bVal & 0xff);
             if ((delimiters != null && delimiters.get(current))
                     || CharsetUtil.isWhitespace(current) || current == '(' || 
current == '\"') {
                 break;
-            } else {
-                pos++;
-                dstRaw.append(currentByte);
             }
+            pos++;
+            if (bVal < 0) {
+                // Non-ASCII byte: undo the ASCII chars written so far, redo 
via UTF-8 decode
+                dst.setLength(dstStart);
+                ByteArrayBuffer dstRaw = DECODE_BUFFER.get();
+                dstRaw.clear();
+                for (int j = indexFrom; j <= i; j++) {
+                    dstRaw.append(buf.byteAt(j));
+                }
+                for (int k = i + 1; k < indexTo; k++) {
+                    byte bk = buf.byteAt(k);
+                    char ck = (char) (bk & 0xff);
+                    if ((delimiters != null && delimiters.get(ck))
+                            || CharsetUtil.isWhitespace(ck) || ck == '(' || ck 
== '\"') {
+                        break;
+                    }
+                    pos++;
+                    dstRaw.append(bk);
+                }
+                cursor.updatePos(pos);
+                dst.append(ContentUtil.decode(StandardCharsets.UTF_8, dstRaw));
+                return;
+            }
+            dst.append(current);
         }
-        String decoded = CharsetUtil.isASCII(dstRaw) ? 
ContentUtil.decode(dstRaw) : ContentUtil.decode(StandardCharsets.UTF_8, dstRaw);
-        dst.append(decoded);
         cursor.updatePos(pos);
     }
 
@@ -397,7 +441,8 @@ public class RawFieldParser {
         pos++;
         indexFrom++;
 
-        ByteArrayBuffer dstRaw = new ByteArrayBuffer(indexTo - indexFrom);
+        ByteArrayBuffer dstRaw = DECODE_BUFFER.get();
+        dstRaw.clear();
 
         boolean escaped = false;
         for (int i = indexFrom; i < indexTo; i++, pos++) {
@@ -422,13 +467,25 @@ public class RawFieldParser {
             }
         }
 
-        String decoded = CharsetUtil.isASCII(dstRaw) ? 
ContentUtil.decode(dstRaw) : ContentUtil.decode(StandardCharsets.UTF_8, dstRaw);
-        if (decoded.startsWith("=?")) {
-            decoded = DecoderUtil.decodeEncodedWords(decoded, 
DecodeMonitor.SILENT);
+        if (CharsetUtil.isASCII(dstRaw)) {
+            // Check for encoded word directly on bytes — avoids an 
intermediate String
+            if (dstRaw.length() > 1 && dstRaw.byteAt(0) == '=' && 
dstRaw.byteAt(1) == '?') {
+                String raw = ContentUtil.decode(dstRaw);
+                dst.append(DecoderUtil.decodeEncodedWords(raw, 
DecodeMonitor.SILENT));
+            } else {
+                // Pure ASCII, not encoded: append bytes as chars — no String 
allocation
+                for (int i = 0; i < dstRaw.length(); i++) {
+                    dst.append((char) (dstRaw.byteAt(i) & 0xff));
+                }
+            }
+        } else {
+            String decoded = ContentUtil.decode(StandardCharsets.UTF_8, 
dstRaw);
+            if (decoded.startsWith("=?")) {
+                decoded = DecoderUtil.decodeEncodedWords(decoded, 
DecodeMonitor.SILENT);
+            }
+            dst.append(decoded);
         }
 
-        dst.append(decoded);
-
         cursor.updatePos(pos);
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to