This is an automated email from the ASF dual-hosted git repository.

lgbo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 2a2d5bdbcd fix failure on too large double number (#7570)
2a2d5bdbcd is described below

commit 2a2d5bdbcd82057b4740d0a6fabd644ef6cf31a6
Author: lgbo <[email protected]>
AuthorDate: Thu Oct 17 15:07:27 2024 +0800

    fix failure on too large double number (#7570)
---
 .../GlutenClickhouseFunctionSuite.scala            | 22 +++++++++
 .../Functions/SparkFunctionGetJsonObject.h         | 54 +++++++++++++++++++++-
 2 files changed, 74 insertions(+), 2 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 3012be2e39..beae408d72 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -326,4 +326,26 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("GLUTEN-7563 too large number in json") {
+    withTable("test_7563") {
+      sql("create table test_7563(a string) using parquet")
+      val insert_sql =
+        """
+          |insert into test_7563 values
+          |('{"a":2.696539702293474E308}')
+          |,('{"a":1232}')
+          |,('{"a":1234xxx}')
+          |,('{"a":2.696539702293474E30123}')
+          |""".stripMargin
+      sql(insert_sql)
+      compareResultsAgainstVanillaSpark(
+        """
+          |select a, get_json_object(a, '$.a') from test_7563
+          |""".stripMargin,
+        true,
+        { _ => }
+      )
+    }
+  }
+
 }
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h 
b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
index 6ffe096b67..36ada4e0fa 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionGetJsonObject.h
@@ -15,7 +15,10 @@
  * limitations under the License.
  */
 #pragma once
+#include <cerrno>
+#include <limits>
 #include <memory>
+#include <string>
 #include <string_view>
 #include <Columns/ColumnNullable.h>
 #include <Columns/ColumnTuple.h>
@@ -156,10 +159,56 @@ private:
             // LOG_DEBUG(getLogger("GetJsonObject"), "xxx normalizeField. not 
field");
             return nullptr;
         }
-        copyToDst(dst, start_pos, pos - start_pos);
+        if (*start_pos == '"' || *start_pos == '\'')
+        {
+            copyToDst(dst, start_pos, pos - start_pos);
+        }
+        else
+        {
+            // If it's a too large number, replace it with "Infinity".
+            const char * inf_str = "\"\\\"Infinity\\\"\"";
+            size_t inf_str_len = 14;
+            const char * large_e = "308";
+            const auto * ep = find_first_symbols<'e', 'E'>(start_pos, pos);
+            if (pos - ep < 3)
+                copyToDst(dst, start_pos, pos - start_pos);
+            else if (pos - ep > 4 || (pos - ep == 4 and memcmp(ep + 1, 
large_e, 3) >= 0))
+            {
+                if (isTooLargeNumber(start_pos, pos))
+                {
+                    copyToDst(dst, inf_str, inf_str_len);
+                }
+                else
+                {
+                    copyToDst(dst, start_pos, pos - start_pos);
+                }
+            }
+            else
+            {
+                copyToDst(dst, start_pos, pos - start_pos);
+            }
+        }
         return pos;
     }
 
+    inline static bool isTooLargeNumber(const char * start, const char * end)
+    {
+        bool res = false;
+        try
+        {
+            double num2 = std::stod(String(start, end));
+        }
+        catch (const std::invalid_argument & e)
+        {
+            res = false;
+        }
+        catch (const std::out_of_range & e)
+        {
+            res = true;
+        }
+        return res;
+    }
+
     inline static const char * normalizeString(const char * pos, const char * 
end, char *& dst)
     {
         const auto * start_pos = pos;
@@ -241,7 +290,7 @@ private:
         pos = find_first_symbols<'\''>(pos, end);
         if (!isExpectedChar('\'', pos, end))
         {
-            LOG_DEBUG(getLogger("GetJsonObject"), "xxx 
normalizeSingleQuotesString. not '");
+            // LOG_DEBUG(getLogger("GetJsonObject"), "xxx 
normalizeSingleQuotesString. not '");
             return nullptr;
         }
         pos += 1;
@@ -642,6 +691,7 @@ private:
             for (const auto & field : tokenizer)
             {
                 auto normalized_field = JSONPathNormalizer::normalize(field);
+                // LOG_ERROR(getLogger("JSONPatch"), "xxx field {} -> {}", 
field, normalized_field);
                 required_fields.push_back(normalized_field);
                 tuple_columns.emplace_back(str_type->createColumn());
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to