This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit ec3f822acd6e6d2013936498af218f3c551d0310
Author: Ali Alsuliman <[email protected]>
AuthorDate: Wed Jan 7 16:49:43 2026 -0800

    [NO ISSUE][RT] Truncate warning message to writeUTF limit
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    When serializing the warnings, truncate the message to
    the UTF-8 limit of 65535 since DataOutput.writeUTF()
    has a hard limit.
    
    Ext-ref: MB-69956
    
    Change-Id: Ic18a027bc88da8813c6f58c40f6eef236fdb1d14
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20769
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Ali Alsuliman <[email protected]>
    Reviewed-by: Ian Maxon <[email protected]>
---
 .../org/apache/hyracks/api/exceptions/Warning.java |  3 +-
 .../hyracks/api/util/JavaSerializationUtils.java   | 37 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
index 5f01559d03..8705f368a8 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/Warning.java
@@ -26,6 +26,7 @@ import java.util.Objects;
 
 import org.apache.commons.lang3.SerializationUtils;
 import org.apache.hyracks.api.util.ErrorMessageUtil;
+import org.apache.hyracks.api.util.JavaSerializationUtils;
 
 public class Warning implements Serializable {
 
@@ -85,7 +86,7 @@ public class Warning implements Serializable {
     public void writeFields(DataOutput output) throws IOException {
         output.writeUTF(component);
         output.writeInt(code);
-        output.writeUTF(message);
+        JavaSerializationUtils.writeTruncatedUTF(output, message);
         SourceLocation.writeFields(srcLocation, output);
         writeParams(output, params);
     }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
index 8e2420498e..90a82dbd6f 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/util/JavaSerializationUtils.java
@@ -20,6 +20,7 @@ package org.apache.hyracks.api.util;
 
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.ObjectInputStream;
@@ -34,10 +35,46 @@ import 
org.apache.hyracks.api.comm.IJavaSerializationProvider;
 
 public class JavaSerializationUtils {
     private static IJavaSerializationProvider serProvider = 
DefaultJavaSerializationProvider.INSTANCE;
+    private static final int MAX_UTF_BYTES = 65535;
+    private static final int SAFE_CHAR_LIMIT = MAX_UTF_BYTES / 3;
 
     private JavaSerializationUtils() {
     }
 
+    /**
+     * This is to be used when small strings are the usual case. For strings 
whose modified UTF-8 bytes count is larger
+     * than 65535, then it truncates the string to a one that is less than 
65535.
+     */
+    public static void writeTruncatedUTF(DataOutput out, String s) throws 
IOException {
+        if (s.length() <= SAFE_CHAR_LIMIT) {
+            out.writeUTF(s);
+            return;
+        }
+        truncateToUTFLimit(out, s);
+    }
+
+    private static void truncateToUTFLimit(DataOutput out, String s) throws 
IOException {
+        int modifiedUtf8Len = 0;
+        int i = 0;
+        while (i < s.length()) {
+            int c = s.charAt(i);
+            int bytes;
+            if (c >= 0x0001 && c <= 0x007F) {
+                bytes = 1;
+            } else if (c <= 0x07FF) {
+                bytes = 2;
+            } else {
+                bytes = 3;
+            }
+            modifiedUtf8Len += bytes;
+            if (modifiedUtf8Len > MAX_UTF_BYTES) {
+                break;
+            }
+            i++;
+        }
+        out.writeUTF(i == s.length() ? s : s.substring(0, i));
+    }
+
     public static byte[] serialize(Serializable jobSpec) throws IOException {
         if (jobSpec instanceof byte[]) {
             return (byte[]) jobSpec;

Reply via email to