[ 
https://issues.apache.org/jira/browse/HIVE-26074?focusedWorklogId=759009&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-759009
 ]

ASF GitHub Bot logged work on HIVE-26074:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 20/Apr/22 07:35
            Start Date: 20/Apr/22 07:35
    Worklog Time Spent: 10m 
      Work Description: abstractdog commented on code in PR #3187:
URL: https://github.com/apache/hive/pull/3187#discussion_r845932714


##########
ql/src/test/queries/clientpositive/vector_ptf_bounded_start.q:
##########
@@ -3,24 +3,31 @@ set hive.vectorized.execution.enabled=true;
 set hive.vectorized.execution.ptf.enabled=true;
 set hive.fetch.task.conversion=none;
 
-CREATE TABLE vector_ptf_part_simple_text(p_mfgr string, p_name string, p_date 
date, p_retailprice double, rowindex int)
+CREATE TABLE vector_ptf_part_simple_text(p_mfgr string, p_name string, p_date 
date, p_retailprice double,
+        p_type char(1), p_varchar varchar(5), rowindex int)
         ROW FORMAT DELIMITED
-        FIELDS TERMINATED BY '\t'
+        FIELDS TERMINATED BY ','
         STORED AS TEXTFILE;
 LOAD DATA LOCAL INPATH 
'../../data/files/vector_ptf_part_simple_all_datatypes.txt' OVERWRITE INTO 
TABLE vector_ptf_part_simple_text;
 
+SELECT * from vector_ptf_part_simple_text;
+
 CREATE TABLE vector_ptf_part_simple_orc (p_mfgr string, p_name string, p_date 
date, p_timestamp timestamp, 
-p_int int, p_retailprice double, p_decimal decimal(10,4), rowindex int) stored 
as orc;
+p_int int, p_retailprice double, p_decimal decimal(10,4), p_type char(1), 
p_varchar varchar(5),rowindex int) stored

Review Comment:
   let this be p_char instead of p_type



##########
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java:
##########
@@ -1214,6 +1223,55 @@ public boolean isEqualPrimitive(String s1, String s2) {
   }
 }
 
+class CharValueBoundaryScanner extends SingleValueBoundaryScanner {
+  public CharValueBoundaryScanner(BoundaryDef start, BoundaryDef end,
+      OrderExpressionDef expressionDef, boolean nullsLast) {
+    super(start, end, expressionDef, nullsLast);
+  }
+
+  @Override
+  public boolean isDistanceGreater(Object v1, Object v2, int amt) {
+    HiveChar s1 = PrimitiveObjectInspectorUtils.getHiveChar(v1,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    HiveChar s2 = PrimitiveObjectInspectorUtils.getHiveChar(v2,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    return s1 != null && s2 != null && s1.compareTo(s2) > 0;
+  }
+
+  @Override
+  public boolean isEqual(Object v1, Object v2) {
+    HiveChar s1 = PrimitiveObjectInspectorUtils.getHiveChar(v1,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    HiveChar s2 = PrimitiveObjectInspectorUtils.getHiveChar(v2,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    return (s1 == null && s2 == null) || (s1 != null && s1.equals(s2));
+  }
+}
+
+class VarcharValueBoundaryScanner extends SingleValueBoundaryScanner {
+  public VarcharValueBoundaryScanner(BoundaryDef start, BoundaryDef end,
+      OrderExpressionDef expressionDef, boolean nullsLast) {
+    super(start, end, expressionDef, nullsLast);
+  }
+
+  @Override
+  public boolean isDistanceGreater(Object v1, Object v2, int amt) {
+    HiveVarchar s1 = PrimitiveObjectInspectorUtils.getHiveVarchar(v1,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    HiveVarchar s2 = PrimitiveObjectInspectorUtils.getHiveVarchar(v2,
+        (PrimitiveObjectInspector) expressionDef.getOI());
+    return s1 != null && s2 != null && s1.compareTo(s2) > 0;
+  }
+
+  @Override
+  public boolean isEqual(Object v1, Object v2) {

Review Comment:
   can you please add isEqual testcase to TestValueBoundaryScanner?



##########
ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java:
##########
@@ -768,6 +774,9 @@ public static SingleValueBoundaryScanner 
getBoundaryScanner(BoundaryDef start, B
     case "string":
       return new StringPrimitiveValueBoundaryScanner(start, end, exprDef, 
nullsLast);
     default:
+      if (typeString.startsWith("char") || typeString.startsWith("varchar")) {

Review Comment:
   the same is handled for decimal above:
   ```
       if (typeString.startsWith("decimal")){
         typeString = "decimal"; //DecimalTypeInfo.getTypeName() includes 
scale/precision: "decimal(10,4)"
       }
   ```





Issue Time Tracking
-------------------

    Worklog Id:     (was: 759009)
    Time Spent: 50m  (was: 40m)

> PTF Vectorization: BoundaryScanner for varchar
> ----------------------------------------------
>
>                 Key: HIVE-26074
>                 URL: https://issues.apache.org/jira/browse/HIVE-26074
>             Project: Hive
>          Issue Type: Bug
>            Reporter: László Bodor
>            Assignee: László Bodor
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 50m
>  Remaining Estimate: 0h
>
> HIVE-24761 should be extended for varchar, otherwise it fails on varchar type
> {code}
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Internal Error: 
> attempt to setup a Window for typeString: 'varchar(170)'
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.SingleValueBoundaryScanner.getBoundaryScanner(ValueBoundaryScanner.java:773)
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.MultiValueBoundaryScanner$MultiPrimitiveValueBoundaryScanner.<init
>  (ValueBoundaryScanner.java:1257)
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.MultiValueBoundaryScanner.getScanner(ValueBoundaryScanner.java:1237)
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.ValueBoundaryScanner.getScanner(ValueBoundaryScanner.java:327)
>       at 
> org.apache.hadoop.hive.ql.udf.ptf.PTFRangeUtil.getRange(PTFRangeUtil.java:40)
>       at 
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFGroupBatches.finishPartition(VectorPTFGroupBatches.java:442)
>       at 
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator.finishPartition(VectorPTFOperator.java:631)
>       at 
> org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator.closeOp(VectorPTFOperator.java:782)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:731)
>       at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:755)
>       at 
> org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:383)
>       ... 16 more
> {code}



--
This message was sent by Atlassian Jira
(v8.20.7#820007)

Reply via email to