This is an automated email from the ASF dual-hosted git repository.

krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 665eadae33e HIVE-26294: Allow substr to take bigint params (Steve 
Carlin, reviewed by Krisztian Kasa)
665eadae33e is described below

commit 665eadae33e45c181511a1f487091a8cbedabe9e
Author: scarlin-cloudera <[email protected]>
AuthorDate: Tue Aug 23 09:25:22 2022 -0700

    HIVE-26294: Allow substr to take bigint params (Steve Carlin, reviewed by 
Krisztian Kasa)
---
 .../org/apache/hadoop/hive/ql/udf/UDFSubstr.java   | 69 +++++++++++++++++++---
 ql/src/test/queries/clientpositive/udf_substr.q    | 13 ++++
 .../results/clientpositive/llap/udf_substr.q.out   | 41 +++++++++++++
 .../compiler/errors/invalid_function_param2.q.out  |  2 +-
 4 files changed, 117 insertions(+), 8 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
index f9168a99b80..7c6de37c807 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
@@ -33,6 +33,7 @@ import 
org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
 import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 
 /**
@@ -55,7 +56,6 @@ import org.apache.hadoop.io.Text;
     + "  'b'")
 @VectorizedExpressions({StringSubstrColStart.class, 
StringSubstrColStartLen.class})
 public class UDFSubstr extends UDF implements StatEstimatorProvider {
-
   private final int[] index;
   private final Text r;
 
@@ -64,19 +64,40 @@ public class UDFSubstr extends UDF implements 
StatEstimatorProvider {
     r = new Text();
   }
 
-  public Text evaluate(Text t, IntWritable pos, IntWritable len) {
+  public Text evaluate(Text t, LongWritable pos, LongWritable len) {
+    if ((t == null) || (pos == null) || (len == null)) {
+      return null;
+    }
+
+    long longPos = pos.get();
+    long longLen = len.get();
+    // If an unsupported value is seen, we don't want to return a string
+    // that doesn't match what the user expects, so we return NULL (still
+    // unexpected, of course, but probably better than a bad string).
+    if (longPos > Integer.MAX_VALUE || longLen > Integer.MAX_VALUE ||
+        longPos < Integer.MIN_VALUE || longLen < Integer.MIN_VALUE) {
+      return null;
+    }
 
+    return evaluateInternal(t, (int) longPos, (int) longLen);
+  }
+
+  public Text evaluate(Text t, IntWritable pos, IntWritable len) {
     if ((t == null) || (pos == null) || (len == null)) {
       return null;
     }
 
+    return evaluateInternal(t, pos.get(), len.get());
+  }
+
+  private Text evaluateInternal(Text t, int pos, int len) {
     r.clear();
-    if ((len.get() <= 0)) {
+    if ((len <= 0)) {
       return r;
     }
 
     String s = t.toString();
-    int[] index = makeIndex(pos.get(), len.get(), s.length());
+    int[] index = makeIndex(pos, len, s.length());
     if (index == null) {
       return r;
     }
@@ -112,21 +133,51 @@ public class UDFSubstr extends UDF implements 
StatEstimatorProvider {
 
   private final IntWritable maxValue = new IntWritable(Integer.MAX_VALUE);
 
+  // Even though we are using longs, substr can only deal with ints, so we use
+  // the maximum int value as the maxValue
+  private final LongWritable maxLongValue = new 
LongWritable(Integer.MAX_VALUE);
+
   public Text evaluate(Text s, IntWritable pos) {
     return evaluate(s, pos, maxValue);
   }
 
-  public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable 
len) {
+  public Text evaluate(Text s, LongWritable pos) {
+    return evaluate(s, pos, maxLongValue);
+  }
 
+  public BytesWritable evaluate(BytesWritable bw, LongWritable pos, 
LongWritable len) {
     if ((bw == null) || (pos == null) || (len == null)) {
       return null;
     }
 
-    if ((len.get() <= 0)) {
+    long longPos = pos.get();
+    long longLen = len.get();
+    // If an unsupported value is seen, we don't want to return a string
+    // that doesn't match what the user expects, so we return NULL (still
+    // unexpected, of course, but probably better than a bad string).
+    if (longPos > Integer.MAX_VALUE || longLen > Integer.MAX_VALUE ||
+        longPos < Integer.MIN_VALUE || longLen < Integer.MIN_VALUE) {
+      return null;
+    }
+
+    return evaluateInternal(bw, (int) longPos, (int) longLen);
+  }
+
+  public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable 
len) {
+    if ((bw == null) || (pos == null) || (len == null)) {
+      return null;
+    }
+
+    return evaluateInternal(bw, pos.get(), len.get());
+  }
+
+  private BytesWritable evaluateInternal(BytesWritable bw, int pos, int len) {
+
+    if (len <= 0) {
       return new BytesWritable();
     }
 
-    int[] index = makeIndex(pos.get(), len.get(), bw.getLength());
+    int[] index = makeIndex(pos, len, bw.getLength());
     if (index == null) {
       return new BytesWritable();
     }
@@ -138,6 +189,10 @@ public class UDFSubstr extends UDF implements 
StatEstimatorProvider {
     return evaluate(bw, pos, maxValue);
   }
 
+  public BytesWritable evaluate(BytesWritable bw, LongWritable pos){
+    return evaluate(bw, pos, maxLongValue);
+  }
+
   @Override
   public StatEstimator getStatEstimator() {
     return new SubStrStatEstimator();
diff --git a/ql/src/test/queries/clientpositive/udf_substr.q 
b/ql/src/test/queries/clientpositive/udf_substr.q
index 1784c27c46b..a609536f37e 100644
--- a/ql/src/test/queries/clientpositive/udf_substr.q
+++ b/ql/src/test/queries/clientpositive/udf_substr.q
@@ -76,3 +76,16 @@ SELECT
   substr("abc 玩玩玩 abc", 5),
   substr("abc 玩玩玩 abc", 5, 3)
 FROM src tablesample (1 rows);
+
+SELECT
+  substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+  substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows);
+
+SELECT
+  substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows);
+
+SELECT
+  substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows);
diff --git a/ql/src/test/results/clientpositive/llap/udf_substr.q.out 
b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
index 7c1a0f18e0a..9ffa39b0335 100644
--- a/ql/src/test/results/clientpositive/llap/udf_substr.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
@@ -199,3 +199,44 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 玩      玩       玩玩玩 abc 玩玩玩
+PREHOOK: query: SELECT
+  substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+  substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+  substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+AB     ABC
+PREHOOK: query: SELECT
+  substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: SELECT
+  substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+  substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
diff --git a/ql/src/test/results/compiler/errors/invalid_function_param2.q.out 
b/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
index 60606f7a67e..9a0cd65d392 100644
--- a/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
+++ b/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
@@ -1,2 +1,2 @@
 Semantic Exception: 
-Line 3:36 Wrong arguments ''abc'': No matching method for class 
org.apache.hadoop.hive.ql.udf.UDFSubstr with (string, string). Possible 
choices: _FUNC_(binary, int)  _FUNC_(binary, int, int)  _FUNC_(string, int)  
_FUNC_(string, int, int)  
\ No newline at end of file
+Line 3:36 Wrong arguments ''abc'': No matching method for class 
org.apache.hadoop.hive.ql.udf.UDFSubstr with (string, string). Possible 
choices: _FUNC_(binary, bigint)  _FUNC_(binary, bigint, bigint)  _FUNC_(binary, 
int)  _FUNC_(binary, int, int)  _FUNC_(string, bigint)  _FUNC_(string, bigint, 
bigint)  _FUNC_(string, int)  _FUNC_(string, int, int)  
\ No newline at end of file

Reply via email to