[
https://issues.apache.org/jira/browse/HIVE-24746?focusedWorklogId=549548&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-549548
]
ASF GitHub Bot logged work on HIVE-24746:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 08/Feb/21 12:41
Start Date: 08/Feb/21 12:41
Worklog Time Spent: 10m
Work Description: pgaref commented on a change in pull request #1950:
URL: https://github.com/apache/hive/pull/1950#discussion_r572010803
##########
File path:
ql/src/test/org/apache/hadoop/hive/ql/udf/ptf/TestValueBoundaryScanner.java
##########
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.ptf;
+
+import java.time.ZoneId;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.common.type.TimestampTZ;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestValueBoundaryScanner {
+
+ @Test
+ public void testLongEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+ argDef.setOI(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+
+ LongValueBoundaryScanner scanner =
+ new LongValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ LongWritable w1 = new LongWritable(1);
+ LongWritable w2 = new LongWritable(2);
+
+ Assert.assertTrue(scanner.isEqual(w1, w1));
+
+ Assert.assertFalse(scanner.isEqual(w1, w2));
+ Assert.assertFalse(scanner.isEqual(w2, w1));
+
+ Assert.assertFalse(scanner.isEqual(null, w2));
+ Assert.assertFalse(scanner.isEqual(w1, null));
+
+ Assert.assertTrue(scanner.isEqual(null, null));
+ }
+
+ @Test
+ public void testHiveDecimalEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+
argDef.setOI(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+
+ HiveDecimalValueBoundaryScanner scanner =
+ new HiveDecimalValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ HiveDecimalWritable w1 = new HiveDecimalWritable(1);
+ HiveDecimalWritable w2 = new HiveDecimalWritable(2);
+
+ Assert.assertTrue(scanner.isEqual(w1, w1));
+
+ Assert.assertFalse(scanner.isEqual(w1, w2));
+ Assert.assertFalse(scanner.isEqual(w2, w1));
+
+ Assert.assertFalse(scanner.isEqual(null, w2));
+ Assert.assertFalse(scanner.isEqual(w1, null));
+
+ Assert.assertTrue(scanner.isEqual(null, null));
+ }
+
+ @Test
+ public void testDateEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+ argDef.setOI(PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+
+ DateValueBoundaryScanner scanner =
+ new DateValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ Date date = new Date();
+ date.setTimeInMillis(1000);
+ DateWritableV2 w1 = new DateWritableV2(date);
+ DateWritableV2 w2 = new DateWritableV2(date);
+ DateWritableV2 w3 = new DateWritableV2(); // empty
+
+ Assert.assertTrue(scanner.isEqual(w1, w2));
+ Assert.assertTrue(scanner.isEqual(w2, w1));
+
+ // empty == epoch
+ Assert.assertTrue(scanner.isEqual(w3, new DateWritableV2(new Date())));
+ // empty == another
+ Assert.assertTrue(scanner.isEqual(w1, w3));
Review comment:
Is this expected?
##########
File path:
ql/src/test/org/apache/hadoop/hive/ql/udf/ptf/TestValueBoundaryScanner.java
##########
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.ptf;
+
+import java.time.ZoneId;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.common.type.TimestampTZ;
+import org.apache.hadoop.hive.ql.plan.ptf.OrderExpressionDef;
+import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestValueBoundaryScanner {
+
+ @Test
+ public void testLongEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+ argDef.setOI(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+
+ LongValueBoundaryScanner scanner =
+ new LongValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ LongWritable w1 = new LongWritable(1);
+ LongWritable w2 = new LongWritable(2);
+
+ Assert.assertTrue(scanner.isEqual(w1, w1));
+
+ Assert.assertFalse(scanner.isEqual(w1, w2));
+ Assert.assertFalse(scanner.isEqual(w2, w1));
+
+ Assert.assertFalse(scanner.isEqual(null, w2));
+ Assert.assertFalse(scanner.isEqual(w1, null));
+
+ Assert.assertTrue(scanner.isEqual(null, null));
+ }
+
+ @Test
+ public void testHiveDecimalEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+
argDef.setOI(PrimitiveObjectInspectorFactory.writableHiveDecimalObjectInspector);
+
+ HiveDecimalValueBoundaryScanner scanner =
+ new HiveDecimalValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ HiveDecimalWritable w1 = new HiveDecimalWritable(1);
+ HiveDecimalWritable w2 = new HiveDecimalWritable(2);
+
+ Assert.assertTrue(scanner.isEqual(w1, w1));
+
+ Assert.assertFalse(scanner.isEqual(w1, w2));
+ Assert.assertFalse(scanner.isEqual(w2, w1));
+
+ Assert.assertFalse(scanner.isEqual(null, w2));
+ Assert.assertFalse(scanner.isEqual(w1, null));
+
+ Assert.assertTrue(scanner.isEqual(null, null));
+ }
+
+ @Test
+ public void testDateEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+ argDef.setOI(PrimitiveObjectInspectorFactory.writableDateObjectInspector);
+
+ DateValueBoundaryScanner scanner =
+ new DateValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ Date date = new Date();
+ date.setTimeInMillis(1000);
+ DateWritableV2 w1 = new DateWritableV2(date);
+ DateWritableV2 w2 = new DateWritableV2(date);
+ DateWritableV2 w3 = new DateWritableV2(); // empty
+
+ Assert.assertTrue(scanner.isEqual(w1, w2));
+ Assert.assertTrue(scanner.isEqual(w2, w1));
+
+ // empty == epoch
+ Assert.assertTrue(scanner.isEqual(w3, new DateWritableV2(new Date())));
+ // empty == another
+ Assert.assertTrue(scanner.isEqual(w1, w3));
+
+ Assert.assertFalse(scanner.isEqual(null, w2));
+ Assert.assertFalse(scanner.isEqual(w1, null));
+
+ Assert.assertTrue(scanner.isEqual(null, null));
+ }
+
+ @Test
+ public void testTimestampEquals() {
+ PTFExpressionDef argDef = new PTFExpressionDef();
+
argDef.setOI(PrimitiveObjectInspectorFactory.writableTimestampObjectInspector);
+
+ TimestampValueBoundaryScanner scanner =
+ new TimestampValueBoundaryScanner(null, null, new
OrderExpressionDef(argDef), false);
+ Timestamp ts = new Timestamp();
+ ts.setTimeInMillis(1000);
+
+ TimestampWritableV2 w1 = new TimestampWritableV2(ts);
+ TimestampWritableV2 w2 = new TimestampWritableV2(ts);
+ TimestampWritableV2 w3 = new TimestampWritableV2(); // empty
+
+ Assert.assertTrue(scanner.isEqual(w1, w2));
+ Assert.assertTrue(scanner.isEqual(w2, w1));
+
+ // empty == epoch
+ Assert.assertTrue(scanner.isEqual(w3, new TimestampWritableV2(new
Timestamp())));
+ // empty == another
+ Assert.assertFalse(scanner.isEqual(w1, w3));
Review comment:
This is the opposite behaviour of Date?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
Issue Time Tracking
-------------------
Worklog Id: (was: 549548)
Time Spent: 0.5h (was: 20m)
> PTF: TimestampValueBoundaryScanner can be optimised during range computation
> ----------------------------------------------------------------------------
>
> Key: HIVE-24746
> URL: https://issues.apache.org/jira/browse/HIVE-24746
> Project: Hive
> Issue Type: Improvement
> Reporter: László Bodor
> Assignee: László Bodor
> Priority: Major
> Labels: pull-request-available
> Time Spent: 0.5h
> Remaining Estimate: 0h
>
> During range computation, timestamp ranges become a hotspot due to
> "TimeStamp" comparisons. It has to construct the entire TimeStamp object via
> OI (which incurs LocalTime computation etc internally).
>
> All these are done for "equals" comparison which can be done with "seconds &
> nanoseconds" present in TimeStamp.
>
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/ValueBoundaryScanner.java#L852]
>
>
> Request is to explore optimising this code path, so that equals() can be
> performed with "seconds/nanoseconds" instead of entire timestamp
>
> {noformat}
> at
> org.apache.hadoop.hive.common.type.Timestamp.setTimeInSeconds(Timestamp.java:133)
> at
> org.apache.hadoop.hive.serde2.io.TimestampWritableV2.populateTimestamp(TimestampWritableV2.java:401)
> at
> org.apache.hadoop.hive.serde2.io.TimestampWritableV2.getTimestamp(TimestampWritableV2.java:210)
> at
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getTimestamp(PrimitiveObjectInspectorUtils.java:1239)
> at
> org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.getTimestamp(PrimitiveObjectInspectorUtils.java:1181)
> at
> org.apache.hadoop.hive.ql.udf.ptf.TimestampValueBoundaryScanner.isEqual(ValueBoundaryScanner.java:848)
> at
> org.apache.hadoop.hive.ql.udf.ptf.SingleValueBoundaryScanner.computeEndCurrentRow(ValueBoundaryScanner.java:593)
> at
> org.apache.hadoop.hive.ql.udf.ptf.SingleValueBoundaryScanner.computeEnd(ValueBoundaryScanner.java:530)
> at
> org.apache.hadoop.hive.ql.udf.ptf.BasePartitionEvaluator.getRange(BasePartitionEvaluator.java:273)
> at
> org.apache.hadoop.hive.ql.udf.ptf.BasePartitionEvaluator.iterate(BasePartitionEvaluator.java:219)
> at
> org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.evaluateWindowFunction(WindowingTableFunction.java:147)
> at
> org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction.access$100(WindowingTableFunction.java:61)
> at
> org.apache.hadoop.hive.ql.udf.ptf.WindowingTableFunction$WindowingIterator.next(WindowingTableFunction.java:755)
> at
> org.apache.hadoop.hive.ql.exec.PTFOperator$PTFInvocation.finishPartition(PTFOperator.java:373)
> at
> org.apache.hadoop.hive.ql.exec.PTFOperator.closeOp(PTFOperator.java:104)
> at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:732)
> at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:756)
> at
> org.apache.hadoop.hive.ql.exec.tez.ReduceRecordProcessor.close(ReduceRecordProcessor.java:383)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:284)
> at
> org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:250)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)