This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 665eadae33e HIVE-26294: Allow substr to take bigint params (Steve
Carlin, reviewed by Krisztian Kasa)
665eadae33e is described below
commit 665eadae33e45c181511a1f487091a8cbedabe9e
Author: scarlin-cloudera <[email protected]>
AuthorDate: Tue Aug 23 09:25:22 2022 -0700
HIVE-26294: Allow substr to take bigint params (Steve Carlin, reviewed by
Krisztian Kasa)
---
.../org/apache/hadoop/hive/ql/udf/UDFSubstr.java | 69 +++++++++++++++++++---
ql/src/test/queries/clientpositive/udf_substr.q | 13 ++++
.../results/clientpositive/llap/udf_substr.q.out | 41 +++++++++++++
.../compiler/errors/invalid_function_param2.q.out | 2 +-
4 files changed, 117 insertions(+), 8 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
index f9168a99b80..7c6de37c807 100755
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java
@@ -33,6 +33,7 @@ import
org.apache.hadoop.hive.ql.stats.estimator.StatEstimator;
import org.apache.hadoop.hive.ql.stats.estimator.StatEstimatorProvider;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
/**
@@ -55,7 +56,6 @@ import org.apache.hadoop.io.Text;
+ " 'b'")
@VectorizedExpressions({StringSubstrColStart.class,
StringSubstrColStartLen.class})
public class UDFSubstr extends UDF implements StatEstimatorProvider {
-
private final int[] index;
private final Text r;
@@ -64,19 +64,40 @@ public class UDFSubstr extends UDF implements
StatEstimatorProvider {
r = new Text();
}
- public Text evaluate(Text t, IntWritable pos, IntWritable len) {
+ public Text evaluate(Text t, LongWritable pos, LongWritable len) {
+ if ((t == null) || (pos == null) || (len == null)) {
+ return null;
+ }
+
+ long longPos = pos.get();
+ long longLen = len.get();
+ // If an unsupported value is seen, we don't want to return a string
+ // that doesn't match what the user expects, so we return NULL (still
+ // unexpected, of course, but probably better than a bad string).
+ if (longPos > Integer.MAX_VALUE || longLen > Integer.MAX_VALUE ||
+ longPos < Integer.MIN_VALUE || longLen < Integer.MIN_VALUE) {
+ return null;
+ }
+ return evaluateInternal(t, (int) longPos, (int) longLen);
+ }
+
+ public Text evaluate(Text t, IntWritable pos, IntWritable len) {
if ((t == null) || (pos == null) || (len == null)) {
return null;
}
+ return evaluateInternal(t, pos.get(), len.get());
+ }
+
+ private Text evaluateInternal(Text t, int pos, int len) {
r.clear();
- if ((len.get() <= 0)) {
+ if ((len <= 0)) {
return r;
}
String s = t.toString();
- int[] index = makeIndex(pos.get(), len.get(), s.length());
+ int[] index = makeIndex(pos, len, s.length());
if (index == null) {
return r;
}
@@ -112,21 +133,51 @@ public class UDFSubstr extends UDF implements
StatEstimatorProvider {
private final IntWritable maxValue = new IntWritable(Integer.MAX_VALUE);
+ // Even though we are using longs, substr can only deal with ints, so we use
+ // the maximum int value as the maxValue
+ private final LongWritable maxLongValue = new
LongWritable(Integer.MAX_VALUE);
+
public Text evaluate(Text s, IntWritable pos) {
return evaluate(s, pos, maxValue);
}
- public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable
len) {
+ public Text evaluate(Text s, LongWritable pos) {
+ return evaluate(s, pos, maxLongValue);
+ }
+ public BytesWritable evaluate(BytesWritable bw, LongWritable pos,
LongWritable len) {
if ((bw == null) || (pos == null) || (len == null)) {
return null;
}
- if ((len.get() <= 0)) {
+ long longPos = pos.get();
+ long longLen = len.get();
+ // If an unsupported value is seen, we don't want to return a string
+ // that doesn't match what the user expects, so we return NULL (still
+ // unexpected, of course, but probably better than a bad string).
+ if (longPos > Integer.MAX_VALUE || longLen > Integer.MAX_VALUE ||
+ longPos < Integer.MIN_VALUE || longLen < Integer.MIN_VALUE) {
+ return null;
+ }
+
+ return evaluateInternal(bw, (int) longPos, (int) longLen);
+ }
+
+ public BytesWritable evaluate(BytesWritable bw, IntWritable pos, IntWritable
len) {
+ if ((bw == null) || (pos == null) || (len == null)) {
+ return null;
+ }
+
+ return evaluateInternal(bw, pos.get(), len.get());
+ }
+
+ private BytesWritable evaluateInternal(BytesWritable bw, int pos, int len) {
+
+ if (len <= 0) {
return new BytesWritable();
}
- int[] index = makeIndex(pos.get(), len.get(), bw.getLength());
+ int[] index = makeIndex(pos, len, bw.getLength());
if (index == null) {
return new BytesWritable();
}
@@ -138,6 +189,10 @@ public class UDFSubstr extends UDF implements
StatEstimatorProvider {
return evaluate(bw, pos, maxValue);
}
+ public BytesWritable evaluate(BytesWritable bw, LongWritable pos){
+ return evaluate(bw, pos, maxLongValue);
+ }
+
@Override
public StatEstimator getStatEstimator() {
return new SubStrStatEstimator();
diff --git a/ql/src/test/queries/clientpositive/udf_substr.q
b/ql/src/test/queries/clientpositive/udf_substr.q
index 1784c27c46b..a609536f37e 100644
--- a/ql/src/test/queries/clientpositive/udf_substr.q
+++ b/ql/src/test/queries/clientpositive/udf_substr.q
@@ -76,3 +76,16 @@ SELECT
substr("abc 玩玩玩 abc", 5),
substr("abc 玩玩玩 abc", 5, 3)
FROM src tablesample (1 rows);
+
+SELECT
+ substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+ substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows);
+
+SELECT
+ substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows);
+
+SELECT
+ substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows);
diff --git a/ql/src/test/results/clientpositive/llap/udf_substr.q.out
b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
index 7c1a0f18e0a..9ffa39b0335 100644
--- a/ql/src/test/results/clientpositive/llap/udf_substr.q.out
+++ b/ql/src/test/results/clientpositive/llap/udf_substr.q.out
@@ -199,3 +199,44 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
玩 玩 玩玩玩 abc 玩玩玩
+PREHOOK: query: SELECT
+ substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+ substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ substr('ABC', cast(1 as bigint), cast(2 as bigint)),
+ substr('ABC', cast(1 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+AB ABC
+PREHOOK: query: SELECT
+ substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ substr('ABC', cast(1 as bigint), cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: SELECT
+ substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
+ substr('ABC', cast(2147483649 as bigint))
+FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+NULL
diff --git a/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
b/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
index 60606f7a67e..9a0cd65d392 100644
--- a/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
+++ b/ql/src/test/results/compiler/errors/invalid_function_param2.q.out
@@ -1,2 +1,2 @@
Semantic Exception:
-Line 3:36 Wrong arguments ''abc'': No matching method for class
org.apache.hadoop.hive.ql.udf.UDFSubstr with (string, string). Possible
choices: _FUNC_(binary, int) _FUNC_(binary, int, int) _FUNC_(string, int)
_FUNC_(string, int, int)
\ No newline at end of file
+Line 3:36 Wrong arguments ''abc'': No matching method for class
org.apache.hadoop.hive.ql.udf.UDFSubstr with (string, string). Possible
choices: _FUNC_(binary, bigint) _FUNC_(binary, bigint, bigint) _FUNC_(binary,
int) _FUNC_(binary, int, int) _FUNC_(string, bigint) _FUNC_(string, bigint,
bigint) _FUNC_(string, int) _FUNC_(string, int, int)
\ No newline at end of file