This is an automated email from the ASF dual-hosted git repository.
lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f168e8b8b6 fix: fail to normalize jon text with empty object in it
(#7595)
f168e8b8b6 is described below
commit f168e8b8b60e249bf0e3bf53168c30c0111f2c15
Author: lgbo <[email protected]>
AuthorDate: Mon Oct 21 10:11:20 2024 +0800
fix: fail to normalize jon text with empty object in it (#7595)
---
.../GlutenClickhouseFunctionSuite.scala | 21 +++++++++++++++++++++
.../Functions/SparkFunctionGetJsonObject.h | 4 +++-
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index beae408d72..5cf2bb1a78 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -348,4 +348,25 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
}
}
+ test("GLUTEN-7591 get_json_object: normalize empty object fail") {
+ withTable("test_7591") {
+ sql("create table test_7591(a string) using parquet")
+ val insert_sql =
+ """
+ |insert into test_7591
+ |select if(id < 10005, concat('{"a":', id), concat('{"a":', id , ',
"b":{}}')) from
+ |(SELECT explode(sequence(1, 10010)) as id);
+ |""".stripMargin
+ sql(insert_sql)
+ compareResultsAgainstVanillaSpark(
+ """
+ |select get_json_object(a, '$.a') from test_7591
+ |where get_json_object(a, '$.a') is not null
+ |""".stripMargin,
+ true,
+ { _ => }
+ )
+ }
+ }
+
}
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index 36ada4e0fa..fa9b78194b 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -394,6 +394,8 @@ private:
pos = normalizeSingleQuotesString(pos, end, dst);
else if (*pos == '"')
pos = normalizeString(pos, end, dst);
+ else if (*pos == '}')
+ continue;
else
return nullptr;
}
@@ -656,7 +658,7 @@ private:
{
is_doc_ok = parser.parse(str, doc);
}
- if (!is_doc_ok)
+ if (!is_doc_ok && str.size() > 0)
{
total_normalized_rows++;
std::vector<char> buf;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]