This is an automated email from the ASF dual-hosted git repository.

vivekrai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/gobblin.git


The following commit(s) were added to refs/heads/master by this push:
     new 3617c8f7d7 Revert "[GOBBLIN-2223] Optimise writing of serialised Work 
Unit to File syste…" (#4138)
3617c8f7d7 is described below

commit 3617c8f7d75ed1f8ad55842b983f77f2113ae393
Author: thisisArjit <[email protected]>
AuthorDate: Thu Sep 4 10:06:01 2025 +0530

    Revert "[GOBBLIN-2223] Optimise writing of serialised Work Unit to File 
syste…" (#4138)
    
    This reverts commit 88555c24cc99a3de07e8fabae7475b6878560e34.
---
 .../gobblin/compat/hadoop/TextSerializer.java      | 22 ++++++++--------------
 .../apache/gobblin/compat/TextSerializerTest.java  | 20 --------------------
 2 files changed, 8 insertions(+), 34 deletions(-)

diff --git 
a/gobblin-api/src/main/java/org/apache/gobblin/compat/hadoop/TextSerializer.java
 
b/gobblin-api/src/main/java/org/apache/gobblin/compat/hadoop/TextSerializer.java
index 14ecce1486..a0939d68b9 100644
--- 
a/gobblin-api/src/main/java/org/apache/gobblin/compat/hadoop/TextSerializer.java
+++ 
b/gobblin-api/src/main/java/org/apache/gobblin/compat/hadoop/TextSerializer.java
@@ -19,6 +19,7 @@ package org.apache.gobblin.compat.hadoop;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
 
 /**
@@ -30,27 +31,20 @@ public class TextSerializer {
    * Serialize a String using the same logic as a Hadoop Text object
    */
   public static void writeStringAsText(DataOutput stream, String str) throws 
IOException {
-    // TODO: Use writeChars instead of writeBytes to support unicode
-    for (int i = 0; i < str.length(); i++) {
-      if (str.charAt(i) > 0x7F) {
-        throw new IllegalArgumentException("Non-ASCII character detected.");
-      }
-    }
-    writeVLong(stream, str.length());
-    stream.writeBytes(str);
+    byte[] utf8Encoded = str.getBytes(StandardCharsets.UTF_8);
+    writeVLong(stream, utf8Encoded.length);
+    stream.write(utf8Encoded);
   }
 
   /**
    * Deserialize a Hadoop Text object into a String
    */
   public static String readTextAsString(DataInput in) throws IOException {
-    int bufLen = (int) readVLong(in);
-    StringBuilder sb = new StringBuilder();
+    int bufLen = (int)readVLong(in);
+    byte[] buf = new byte[bufLen];
+    in.readFully(buf);
 
-    for (int i = 0; i < bufLen; i++) {
-      sb.append((char) in.readByte());
-    }
-    return sb.toString();
+    return new String(buf, StandardCharsets.UTF_8);
   }
 
   /**
diff --git 
a/gobblin-api/src/test/java/org/apache/gobblin/compat/TextSerializerTest.java 
b/gobblin-api/src/test/java/org/apache/gobblin/compat/TextSerializerTest.java
index 30906c9222..04ba79f9b6 100644
--- 
a/gobblin-api/src/test/java/org/apache/gobblin/compat/TextSerializerTest.java
+++ 
b/gobblin-api/src/test/java/org/apache/gobblin/compat/TextSerializerTest.java
@@ -33,26 +33,6 @@ import org.apache.gobblin.compat.hadoop.TextSerializer;
 
 public class TextSerializerTest {
   private static final String[] textsToSerialize = new String[]{"abracadabra", 
Strings.repeat("longString", 128000)};
-  private static final String[] serializationErrorText = new String[]{".߸´ˇ", 
Strings.repeat("ˀ.¸¯.", 128000)};
-
-  @Test
-  public void testSerializeError() throws IOException {
-    // Use our serializer, verify Hadoop deserializer can read it back
-    for (String textToSerialize : serializationErrorText) {
-      ByteArrayOutputStream bOs = new ByteArrayOutputStream();
-      DataOutputStream dataOutputStream = new DataOutputStream(bOs);
-
-      try {
-        TextSerializer.writeStringAsText(dataOutputStream, textToSerialize);
-        Assert.fail("Expected IOException not thrown");
-      } catch (Exception e) {
-        Assert.assertTrue(e instanceof IllegalArgumentException);
-        // Expected exception
-      } finally {
-        dataOutputStream.close();
-      }
-    }
-  }
 
   @Test
   public void testSerialize()

Reply via email to