This is an automated email from the ASF dual-hosted git repository.
lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 2a2d5bdbcd fix failure on too large double number (#7570)
2a2d5bdbcd is described below
commit 2a2d5bdbcd82057b4740d0a6fabd644ef6cf31a6
Author: lgbo <[email protected]>
AuthorDate: Thu Oct 17 15:07:27 2024 +0800
fix failure on too large double number (#7570)
---
.../GlutenClickhouseFunctionSuite.scala | 22 +++++++++
.../Functions/SparkFunctionGetJsonObject.h | 54 +++++++++++++++++++++-
2 files changed, 74 insertions(+), 2 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 3012be2e39..beae408d72 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -326,4 +326,26 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
}
}
+ test("GLUTEN-7563 too large number in json") {
+ withTable("test_7563") {
+ sql("create table test_7563(a string) using parquet")
+ val insert_sql =
+ """
+ |insert into test_7563 values
+ |('{"a":2.696539702293474E308}')
+ |,('{"a":1232}')
+ |,('{"a":1234xxx}')
+ |,('{"a":2.696539702293474E30123}')
+ |""".stripMargin
+ sql(insert_sql)
+ compareResultsAgainstVanillaSpark(
+ """
+ |select a, get_json_object(a, '$.a') from test_7563
+ |""".stripMargin,
+ true,
+ { _ => }
+ )
+ }
+ }
+
}
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index 6ffe096b67..36ada4e0fa 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -15,7 +15,10 @@
* limitations under the License.
*/
#pragma once
+#include <cerrno>
+#include <limits>
#include <memory>
+#include <string>
#include <string_view>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
@@ -156,10 +159,56 @@ private:
// LOG_DEBUG(getLogger("GetJsonObject"), "xxx normalizeField. not
field");
return nullptr;
}
- copyToDst(dst, start_pos, pos - start_pos);
+ if (*start_pos == '"' || *start_pos == '\'')
+ {
+ copyToDst(dst, start_pos, pos - start_pos);
+ }
+ else
+ {
+ // If it's a too large number, replace it with "Infinity".
+ const char * inf_str = "\"\\\"Infinity\\\"\"";
+ size_t inf_str_len = 14;
+ const char * large_e = "308";
+ const auto * ep = find_first_symbols<'e', 'E'>(start_pos, pos);
+ if (pos - ep < 3)
+ copyToDst(dst, start_pos, pos - start_pos);
+ else if (pos - ep > 4 || (pos - ep == 4 and memcmp(ep + 1,
large_e, 3) >= 0))
+ {
+ if (isTooLargeNumber(start_pos, pos))
+ {
+ copyToDst(dst, inf_str, inf_str_len);
+ }
+ else
+ {
+ copyToDst(dst, start_pos, pos - start_pos);
+ }
+ }
+ else
+ {
+ copyToDst(dst, start_pos, pos - start_pos);
+ }
+ }
return pos;
}
+ inline static bool isTooLargeNumber(const char * start, const char * end)
+ {
+ bool res = false;
+ try
+ {
+ double num2 = std::stod(String(start, end));
+ }
+ catch (const std::invalid_argument & e)
+ {
+ res = false;
+ }
+ catch (const std::out_of_range & e)
+ {
+ res = true;
+ }
+ return res;
+ }
+
inline static const char * normalizeString(const char * pos, const char *
end, char *& dst)
{
const auto * start_pos = pos;
@@ -241,7 +290,7 @@ private:
pos = find_first_symbols<'\''>(pos, end);
if (!isExpectedChar('\'', pos, end))
{
- LOG_DEBUG(getLogger("GetJsonObject"), "xxx
normalizeSingleQuotesString. not '");
+ // LOG_DEBUG(getLogger("GetJsonObject"), "xxx
normalizeSingleQuotesString. not '");
return nullptr;
}
pos += 1;
@@ -642,6 +691,7 @@ private:
for (const auto & field : tokenizer)
{
auto normalized_field = JSONPathNormalizer::normalize(field);
+ // LOG_ERROR(getLogger("JSONPatch"), "xxx field {} -> {}",
field, normalized_field);
required_fields.push_back(normalized_field);
tuple_columns.emplace_back(str_type->createColumn());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]