This is an automated email from the ASF dual-hosted git repository.

lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new f168e8b8b6 fix: fail to normalize jon text with empty object in it 
(#7595)
f168e8b8b6 is described below

commit f168e8b8b60e249bf0e3bf53168c30c0111f2c15
Author: lgbo <[email protected]>
AuthorDate: Mon Oct 21 10:11:20 2024 +0800

    fix: fail to normalize jon text with empty object in it (#7595)
---
 .../GlutenClickhouseFunctionSuite.scala             | 21 +++++++++++++++++++++
 .../Functions/SparkFunctionGetJsonObject.h          |  4 +++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index beae408d72..5cf2bb1a78 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -348,4 +348,25 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("GLUTEN-7591 get_json_object: normalize empty object fail") {
+    withTable("test_7591") {
+      sql("create table test_7591(a string) using parquet")
+      val insert_sql =
+        """
+          |insert into test_7591
+          |select if(id < 10005, concat('{"a":', id), concat('{"a":', id , ', 
"b":{}}')) from
+          |(SELECT explode(sequence(1, 10010)) as id);
+          |""".stripMargin
+      sql(insert_sql)
+      compareResultsAgainstVanillaSpark(
+        """
+          |select get_json_object(a, '$.a') from test_7591
+          |where get_json_object(a, '$.a') is not null
+          |""".stripMargin,
+        true,
+        { _ => }
+      )
+    }
+  }
+
 }
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h 
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index 36ada4e0fa..fa9b78194b 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -394,6 +394,8 @@ private:
                     pos = normalizeSingleQuotesString(pos, end, dst);
                 else if (*pos == '"')
                     pos = normalizeString(pos, end, dst);
+                else if (*pos == '}')
+                    continue;
                 else
                     return nullptr;
             }
@@ -656,7 +658,7 @@ private:
         {
             is_doc_ok = parser.parse(str, doc);
         }
-        if (!is_doc_ok)
+        if (!is_doc_ok && str.size() > 0)
         {
             total_normalized_rows++;
             std::vector<char> buf;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to