[
https://issues.apache.org/jira/browse/HIVE-26074?focusedWorklogId=759009&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-759009
]
ASF GitHub Bot logged work on HIVE-26074:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 20/Apr/22 07:35
Start Date: 20/Apr/22 07:35
Worklog Time Spent: 10m
Work Description: abstractdog commented on code in PR #3187:
URL: https://github.com/apache/hive/pull/3187#discussion_r845932714
##########
ql/src/test/queries/clientpositive/vector_ptf_bounded_start.q:
##########
@@ -3,24 +3,31 @@ set hive.vectorized.execution.enabled=true;
set hive.vectorized.execution.ptf.enabled=true;
set hive.fetch.task.conversion=none;
-CREATE TABLE vector_ptf_part_simple_text(p_mfgr string, p_name string, p_date
date, p_retailprice double, rowindex int)
+CREATE TABLE vector_ptf_part_simple_text(p_mfgr string, p_name string, p_date
date, p_retailprice double,
+ p_type char(1), p_varchar varchar(5), rowindex int)
ROW FORMAT DELIMITED
- FIELDS TERMINATED BY '\t'
+ FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH
'../../data/files/vector_ptf_part_simple_all_datatypes.txt' OVERWRITE INTO
TABLE vector_ptf_part_simple_text;
+SELECT * from vector_ptf_part_simple_text;
+
CREATE TABLE vector_ptf_part_simple_orc (p_mfgr string, p_name string, p_date
date, p_timestamp timestamp,
-p_int int, p_retailprice double, p_decimal decimal(10,4), rowindex int) stored
as orc;
+p_int int, p_retailprice double, p_decimal decimal(10,4), p_type char(1),
p_varchar varchar(5),rowindex int) stored
Review Comment:
let this be p_char instead of p_type
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java:
##########
@@ -1214,6 +1223,55 @@ public boolean isEqualPrimitive(String s1, String s2) {
}
}
+class CharValueBoundaryScanner extends SingleValueBoundaryScanner {
+ public CharValueBoundaryScanner(BoundaryDef start, BoundaryDef end,
+ OrderExpressionDef expressionDef, boolean nullsLast) {
+ super(start, end, expressionDef, nullsLast);
+ }
+
+ @Override
+ public boolean isDistanceGreater(Object v1, Object v2, int amt) {
+ HiveChar s1 = PrimitiveObjectInspectorUtils.getHiveChar(v1,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ HiveChar s2 = PrimitiveObjectInspectorUtils.getHiveChar(v2,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ return s1 != null && s2 != null && s1.compareTo(s2) > 0;
+ }
+
+ @Override
+ public boolean isEqual(Object v1, Object v2) {
+ HiveChar s1 = PrimitiveObjectInspectorUtils.getHiveChar(v1,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ HiveChar s2 = PrimitiveObjectInspectorUtils.getHiveChar(v2,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ return (s1 == null && s2 == null) || (s1 != null && s1.equals(s2));
+ }
+}
+
+class VarcharValueBoundaryScanner extends SingleValueBoundaryScanner {
+ public VarcharValueBoundaryScanner(BoundaryDef start, BoundaryDef end,
+ OrderExpressionDef expressionDef, boolean nullsLast) {
+ super(start, end, expressionDef, nullsLast);
+ }
+
+ @Override
+ public boolean isDistanceGreater(Object v1, Object v2, int amt) {
+ HiveVarchar s1 = PrimitiveObjectInspectorUtils.getHiveVarchar(v1,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ HiveVarchar s2 = PrimitiveObjectInspectorUtils.getHiveVarchar(v2,
+ (PrimitiveObjectInspector) expressionDef.getOI());
+ return s1 != null && s2 != null && s1.compareTo(s2) > 0;
+ }
+
+ @Override
+ public boolean isEqual(Object v1, Object v2) {
Review Comment:
can you please add isEqual testcase to TestValueBoundaryScanner?
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java:
##########
@@ -768,6 +774,9 @@ public static SingleValueBoundaryScanner
getBoundaryScanner(BoundaryDef start, B
case "string":
return new StringPrimitiveValueBoundaryScanner(start, end, exprDef,
nullsLast);
default:
+ if (typeString.startsWith("char") || typeString.startsWith("varchar")) {
Review Comment:
the same is handled for decimal above:
```
if (typeString.startsWith("decimal")){
typeString = "decimal"; //DecimalTypeInfo.getTypeName() includes
scale/precision: "decimal(10,4)"
}
```
Issue Time Tracking
-------------------
Worklog Id: (was: 759009)
Time Spent: 50m (was: 40m)
> PTF Vectorization: BoundaryScanner for varchar
> ----------------------------------------------
>
> Key: HIVE-26074
> URL: https://issues.apache.org/jira/browse/HIVE-26074
> Project: Hive
> Issue Type: Bug
> Reporter: László Bodor
> Assignee: László Bodor
> Priority: Major
> Labels: pull-request-available
> Time Spent: 50m
> Remaining Estimate: 0h
>
> HIVE-24761 should be extended for varchar, otherwise it fails on varchar type
> {code}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Internal Error:
> attempt to setup a Window for typeString: 'varchar(170)'
> at
> org.apache.hadoop.hive.ql.udf.ptf.SingleValueBoundaryScanner.getBoundaryScanner(ValueBoundaryScanner.java:773)
> at
> org.apache.hadoop.hive.ql.udf.ptf.MultiValueBoundaryScanner$MultiPrimitiveValueBoundaryScanner.<init
> (ValueBoundaryScanner.java:1257)
> at
> org.apache.hadoop.hive.ql.udf.ptf.MultiValueBoundaryScanner.getScanner(ValueBoundaryScanner.java:1237)
> at
> org.apache.hadoop.hive.ql.udf.ptf.ValueBoundaryScanner.getScanner(ValueBoundaryScanner.java:327)
> at
> org.apache.hadoop.hive.ql.udf.ptf.PTFRangeUtil.getRange(PTFRangeUtil.java:40)
> at
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFGroupBatches.finishPartition(VectorPTFGroupBatches.java:442)
> at
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator.finishPartition(VectorPTFOperator.java:631)
> at
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator.closeOp(VectorPTFOperator.java:782)
> at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:731)
> at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:755)
> at
> org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:383)
> ... 16 more
> {code}
--
This message was sent by Atlassian Jira
(v8.20.7#820007)