This is an automated email from the ASF dual-hosted git repository.

dockerzhang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/inlong.git


The following commit(s) were added to refs/heads/master by this push:
     new 899aeabf5b [INLONG-11369][Sort] Fix the KV split error when there is a 
escape char without before & and = in text (#11370)
899aeabf5b is described below

commit 899aeabf5b04648372ce1f14d22ce8b6c5845455
Author: Mingyu Bao <[email protected]>
AuthorDate: Mon Oct 21 14:11:35 2024 +0800

    [INLONG-11369][Sort] Fix the KV split error when there is a escape char 
without before & and = in text (#11370)
---
 .../inlong/sort/formats/util/StringUtils.java      | 90 ++++++++++++----------
 .../sort/formats/common/StringUtilsTest.java       | 21 +++--
 .../apache/inlong/sort/formats/kv/KvUtilsTest.java |  2 +-
 3 files changed, 66 insertions(+), 47 deletions(-)

diff --git 
a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
 
b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
index 3ea6678ca1..000d7a7175 100644
--- 
a/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
+++ 
b/inlong-sort/sort-formats/format-common/src/main/java/org/apache/inlong/sort/formats/util/StringUtils.java
@@ -100,11 +100,17 @@ public class StringUtils {
          */
         int kvState = STATE_KEY;
 
-        char lastCh = 0;
+        char nextCh = 0;
         for (int i = 0; i < text.length(); ++i) {
             char ch = text.charAt(i);
+            if ((i + 1) < text.length()) {
+                nextCh = text.charAt(i + 1);
+            } else {
+                nextCh = 0;
+            }
             if (ch == kvDelimiter) {
                 switch (state) {
+                    // match previous kv delimiter first when there are more 
than one kvDelimiter
                     case STATE_KEY:
                         key = stringBuilder.toString();
                         stringBuilder.setLength(0);
@@ -124,24 +130,19 @@ public class StringUtils {
             } else if (ch == entryDelimiter) {
                 switch (state) {
                     case STATE_KEY:
-                        key = lastKey;
-                        if (lastValue == null) {
-                            value = ch + stringBuilder.toString();
-                        } else {
-                            value = lastValue + ch + stringBuilder.toString();
-                        }
-                        fields.put(key, value);
-                        lastKey = key;
-                        lastValue = value;
-                        stringBuilder.setLength(0);
+                        stringBuilder.append(ch);
                         break;
                     case STATE_VALUE:
-                        value = stringBuilder.toString();
-                        fields.put(key, value);
-                        lastKey = key;
-                        lastValue = value;
-                        stringBuilder.setLength(0);
-                        state = STATE_KEY;
+                        if (nextCh == entryDelimiter) {
+                            stringBuilder.append(ch);
+                        } else {
+                            value = stringBuilder.toString();
+                            fields.put(key, value);
+                            lastKey = key;
+                            lastValue = value;
+                            stringBuilder.setLength(0);
+                            state = STATE_KEY;
+                        }
                         break;
                     case STATE_ESCAPING:
                         stringBuilder.append(ch);
@@ -154,12 +155,6 @@ public class StringUtils {
             } else if (escapeChar != null && ch == escapeChar) {
                 switch (state) {
                     case STATE_KEY:
-                        if (lastCh != 0) {
-                            stringBuilder.append(lastCh);
-                        }
-                        kvState = state;
-                        state = STATE_ESCAPING;
-                        break;
                     case STATE_VALUE:
                         kvState = state;
                         state = STATE_ESCAPING;
@@ -175,12 +170,6 @@ public class StringUtils {
             } else if (quoteChar != null && ch == quoteChar) {
                 switch (state) {
                     case STATE_KEY:
-                        if (lastCh != 0) {
-                            stringBuilder.append(lastCh);
-                        }
-                        kvState = state;
-                        state = STATE_QUOTING;
-                        break;
                     case STATE_VALUE:
                         kvState = state;
                         state = STATE_QUOTING;
@@ -196,20 +185,26 @@ public class StringUtils {
             } else if (lineDelimiter != null && ch == lineDelimiter) {
                 switch (state) {
                     case STATE_KEY:
+                        String remainingKey = stringBuilder.toString();
                         key = lastKey;
-                        stringBuilder.append(lastValue).append(lastCh);
+                        stringBuilder.setLength(0);
+                        
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
                         value = stringBuilder.toString();
                         fields.put(key, value);
+                        Map<String, String> copyFields = new HashMap<>();
+                        copyFields.putAll(fields);
+                        lines.add(copyFields);
+                        stringBuilder.setLength(0);
+                        fields.clear();
                         lastKey = null;
                         lastValue = null;
-                        stringBuilder.setLength(0);
                         break;
                     case STATE_VALUE:
                         lastKey = null;
                         lastValue = null;
                         value = stringBuilder.toString();
                         fields.put(key, value);
-                        Map<String, String> copyFields = new HashMap<>();
+                        copyFields = new HashMap<>();
                         copyFields.putAll(fields);
                         lines.add(copyFields);
                         stringBuilder.setLength(0);
@@ -226,14 +221,22 @@ public class StringUtils {
                 }
             } else {
                 stringBuilder.append(ch);
+                switch (state) {
+                    case STATE_ESCAPING:
+                        state = kvState;
+                }
             }
-            lastCh = ch;
         }
 
         switch (state) {
             case STATE_KEY:
                 if (lastKey != null && lastValue != null && text != null) {
-                    fields.put(lastKey, lastValue + lastCh);
+                    String remainingKey = stringBuilder.toString();
+                    key = lastKey;
+                    stringBuilder.setLength(0);
+                    
stringBuilder.append(lastValue).append(entryDelimiter).append(remainingKey);
+                    value = stringBuilder.toString();
+                    fields.put(key, value);
                 }
                 lines.add(fields);
                 return lines;
@@ -244,14 +247,19 @@ public class StringUtils {
                 return lines;
             case STATE_ESCAPING:
             case STATE_QUOTING:
-                value = stringBuilder.toString();
-                String oldValue = fields.get(key);
-                if (value != null && !"".equals(value)
-                        && oldValue != null && !"".equals(oldValue)) {
-                    fields.put(key, oldValue + value);
-                } else if (value != null && !"".equals(value)) {
-                    fields.put(key, value);
+                switch (kvState) {
+                    case STATE_VALUE:
+                        value = stringBuilder.toString();
+                        fields.put(key, value);
+                        break;
+                    case STATE_KEY:
+                        if (lastKey != null) {
+                            value = stringBuilder.toString();
+                            String oldValue = fields.get(key);
+                            fields.put(key, oldValue + entryDelimiter + value);
+                        }
                 }
+
                 lines.add(fields);
                 return lines;
             default:
diff --git 
a/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
 
b/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
index fc64811a97..b9c88ed788 100644
--- 
a/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
+++ 
b/inlong-sort/sort-formats/format-common/src/test/java/org/apache/inlong/sort/formats/common/StringUtilsTest.java
@@ -19,11 +19,13 @@ package org.apache.inlong.sort.formats.common;
 
 import org.apache.inlong.sort.formats.util.StringUtils;
 
+import org.junit.Assert;
 import org.junit.Test;
 
 import java.util.List;
 import java.util.Map;
 
+import static org.apache.inlong.sort.formats.util.StringUtils.splitKv;
 import static org.junit.Assert.assertEquals;
 
 public class StringUtilsTest {
@@ -55,17 +57,17 @@ public class StringUtilsTest {
                 '=', '\\', '\'', '\n');
         assertEquals("=", map4.get(0).get("name"));
         assertEquals("20&&", map4.get(0).get("age"));
-        assertEquals("=", map4.get(0).get("name1"));
-        assertEquals("20&&", map4.get(0).get("age1"));
+        assertEquals("=", map4.get(1).get("name1"));
+        assertEquals("20&&", map4.get(1).get("age1"));
 
         String kvString5 = 
"name==&age=20&&\nname1==&age1=20&&&value=aaa&dddd&";
         List<Map<String, String>> map5 = StringUtils.splitKv(kvString5, '&',
                 '=', '\\', '\'', '\n');
         assertEquals("=", map5.get(0).get("name"));
         assertEquals("20&&", map5.get(0).get("age"));
-        assertEquals("=", map5.get(0).get("name1"));
-        assertEquals("20&&", map5.get(0).get("age1"));
-        assertEquals("aaa&dddd&", map5.get(0).get("value"));
+        assertEquals("=", map5.get(1).get("name1"));
+        assertEquals("20&&", map5.get(1).get("age1"));
+        assertEquals("aaa&dddd&", map5.get(1).get("value"));
 
         String kvString6 = "name==&age=20&&\\";
         List<Map<String, String>> map6 = StringUtils.splitKv(kvString6, '&',
@@ -153,4 +155,13 @@ public class StringUtilsTest {
         assertEquals("home", csv1Array4[2][1]);
         assertEquals("home", csv1Array4[2][2]);
     }
+
+    @Test
+    public void testKvScapeCharSplit() {
+        String text = "k1=v1&\nk\\2=v2\\&&k3=v3";
+        Map<String, String> kvMap = splitKv(text, '&', '=', '\\', null);
+        Assert.assertTrue(kvMap != null && kvMap.size() == 3);
+        Assert.assertTrue(kvMap.get("k3") != null);
+        Assert.assertTrue(kvMap.get("\nk2") != null);
+    }
 }
diff --git 
a/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
 
b/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
index 37bbe758aa..953d607f9c 100644
--- 
a/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
+++ 
b/inlong-sort/sort-formats/format-row/format-kv/src/test/java/org/apache/inlong/sort/formats/kv/KvUtilsTest.java
@@ -210,7 +210,7 @@ public class KvUtilsTest {
     public void testSplitDanglingKey2() {
         Map<String, String> kvMap = splitKv("f1&f2=3", '&',
                 '=', null, null);
-        Assert.assertEquals("3", kvMap.get("f2"));
+        Assert.assertEquals("3", kvMap.get("f1&f2"));
     }
 
     @Test

Reply via email to