This is an automated email from the ASF dual-hosted git repository.
krisztiankasa pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 9aefef155ae HIVE-26992: Select count(*) query fails with error
"java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024
out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko,
Krisztian Kasa)
9aefef155ae is described below
commit 9aefef155ae3afef444944324cd91642b0ae1355
Author: InvisibleProgrammer <[email protected]>
AuthorDate: Tue Feb 14 11:31:35 2023 +0100
HIVE-26992: Select count(*) query fails with error
"java.lang.ArrayIndexOutOfBoundsException: arraycopy: last source index 1024
out of bounds for int[256]" (Zsolt Miskolczi, reviewed by Denys Kuzmenko,
Krisztian Kasa)
---
.../hive/ql/exec/vector/VectorFilterOperator.java | 2 +-
.../hive/ql/exec/vector/VectorTopNKeyOperator.java | 2 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 9 +-
.../ql/exec/vector/TestVectorFilterOperator.java | 106 ++++++++-------------
.../hive/ql/exec/vector/TestVectorOperator.java | 80 ++++++++++++++++
.../ql/exec/vector/TestVectorTopNKeyOperator.java | 61 ++++++++++++
.../ql/exec/vector/TestVectorizedRowBatch.java | 10 ++
7 files changed, 197 insertions(+), 73 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
index 37c0ed8889a..9cfe9fbec15 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java
@@ -113,7 +113,7 @@ public class VectorFilterOperator extends FilterOperator
//The selected vector represents selected rows.
//Clone the selected vector
- System.arraycopy(vrg.selected, 0, temporarySelected, 0, vrg.size);
+ System.arraycopy(vrg.selected, 0, temporarySelected, 0,
vrg.selected.length);
int [] selectedBackup = vrg.selected;
vrg.selected = temporarySelected;
int sizeBackup = vrg.size;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
index 10567c7180a..e44db5c910f 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorTopNKeyOperator.java
@@ -118,7 +118,7 @@ public class VectorTopNKeyOperator extends
Operator<TopNKeyDesc> implements Vect
incomingBatches++;
// The selected vector represents selected rows.
// Clone the selected vector
- System.arraycopy(batch.selected, 0, temporarySelected, 0, batch.size);
+ System.arraycopy(batch.selected, 0, temporarySelected, 0,
batch.selected.length);
int [] selectedBackup = batch.selected;
batch.selected = temporarySelected;
int sizeBackup = batch.size;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index e59d01d09fa..3cf63e24c58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -41,6 +41,7 @@ import java.util.TreeSet;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
@@ -4377,6 +4378,7 @@ public class Vectorizer implements PhysicalPlanResolver {
return false;
}
+ @VisibleForTesting
public static Operator<? extends OperatorDesc> vectorizeFilterOperator(
Operator<? extends OperatorDesc> filterOp, VectorizationContext vContext,
VectorFilterDesc vectorFilterDesc)
@@ -4397,9 +4399,10 @@ public class Vectorizer implements PhysicalPlanResolver {
vContext, vectorFilterDesc);
}
- private static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(
- Operator<? extends OperatorDesc> topNKeyOperator, VectorizationContext
vContext,
- VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
+ @VisibleForTesting
+ public static Operator<? extends OperatorDesc> vectorizeTopNKeyOperator(
+ Operator<? extends OperatorDesc> topNKeyOperator,
VectorizationContext vContext,
+ VectorTopNKeyDesc vectorTopNKeyDesc) throws HiveException {
TopNKeyDesc topNKeyDesc = (TopNKeyDesc) topNKeyOperator.getConf();
VectorExpression[] keyExpressions = getVectorExpressions(vContext,
topNKeyDesc.getKeyColumns());
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
index 2e601d6fdac..b85bae92b61 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorFilterOperator.java
@@ -21,17 +21,15 @@ package org.apache.hadoop.hive.ql.exec.vector;
import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
+import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn;
import org.junit.Assert;
-import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.FilterExprAndExpr;
-import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
-import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualDoubleScalar;
-import
org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -39,57 +37,12 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.VectorFilterDesc;
import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
/**
* Test cases for vectorized filter operator.
*/
-public class TestVectorFilterOperator {
-
- HiveConf hconf = new HiveConf();
-
- /**
- * Fundamental logic and performance tests for vector filters belong here.
- *
- * For tests about filters to cover specific operator and data type
combinations,
- * see also the other filter tests under
org.apache.hadoop.hive.ql.exec.vector.expressions
- */
- public static class FakeDataReader {
- private final int size;
- private final VectorizedRowBatch vrg;
- private int currentSize = 0;
- private final int numCols;
- private final int len = 1024;
-
- public FakeDataReader(int size, int numCols) {
- this.size = size;
- this.numCols = numCols;
- vrg = new VectorizedRowBatch(numCols, len);
- for (int i = 0; i < numCols; i++) {
- try {
- Thread.sleep(2);
- } catch (InterruptedException ignore) {}
- vrg.cols[i] = getLongVector(len);
- }
- }
-
- public VectorizedRowBatch getNext() {
- if (currentSize >= size) {
- vrg.size = 0;
- return vrg;
- } else {
- vrg.size = len;
- currentSize += vrg.size;
- vrg.selectedInUse = false;
- return vrg;
- }
- }
-
- private LongColumnVector getLongVector(int len) {
- LongColumnVector lcv = new LongColumnVector(len);
- TestVectorizedRowBatch.setRandomLongCol(lcv);
- return lcv;
- }
- }
+public class TestVectorFilterOperator extends TestVectorOperator{
private VectorFilterOperator getAVectorFilterOperator() throws HiveException
{
ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(Long.class, "col1",
"table", false);
@@ -110,14 +63,9 @@ public class TestVectorFilterOperator {
@Test
public void testBasicFilterOperator() throws HiveException {
VectorFilterOperator vfo = getAVectorFilterOperator();
- vfo.initialize(hconf, null);
- VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
- VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
- VectorExpression ve3 = new FilterExprAndExpr();
- ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
- vfo.setFilterCondition(ve3);
+ prepareVectorFilterOperation(vfo);
- FakeDataReader fdr = new FakeDataReader(1024*1, 3);
+ FakeDataReader fdr = new FakeDataReader(1024*1, 3,
FakeDataSampleType.Random);
VectorizedRowBatch vrg = fdr.getNext();
@@ -139,14 +87,9 @@ public class TestVectorFilterOperator {
@Test
public void testBasicFilterLargeData() throws HiveException {
VectorFilterOperator vfo = getAVectorFilterOperator();
- vfo.initialize(hconf, null);
- VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
- VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
- VectorExpression ve3 = new FilterExprAndExpr();
- ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
- vfo.setFilterCondition(ve3);
+ prepareVectorFilterOperation(vfo);
- FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3);
+ FakeDataReader fdr = new FakeDataReader(16*1024*1024, 3,
FakeDataSampleType.Random);
long startTime = System.currentTimeMillis();
VectorizedRowBatch vrg = fdr.getNext();
@@ -160,7 +103,7 @@ public class TestVectorFilterOperator {
//Base time
- fdr = new FakeDataReader(16*1024*1024, 3);
+ fdr = new FakeDataReader(16*1024*1024, 3, FakeDataSampleType.Random);
long startTime1 = System.currentTimeMillis();
vrg = fdr.getNext();
@@ -178,5 +121,32 @@ public class TestVectorFilterOperator {
long endTime1 = System.currentTimeMillis();
System.out.println("testBaseFilterOperator base Op Time =
"+(endTime1-startTime1));
}
+
+ @Test
+ public void testVectorFilterHasSelectedSmallerThanBatchDoNotThrowException()
throws HiveException {
+
+ VectorFilterOperator vfo = getAVectorFilterOperator();
+
+ FakeDataReader fdr = new FakeDataReader(1024*1, 3,
FakeDataSampleType.OrderedSequence);
+
+ prepareVectorFilterOperation(vfo);
+
+ VectorizedRowBatch vrg = fdr.getNext();
+
+ vrg.selected = new int[] { 1, 2, 3, 4};
+
+ Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0));
+ }
+
+ private void prepareVectorFilterOperation(VectorFilterOperator vfo) throws
HiveException {
+ vfo.initialize(hiveConf, null);
+
+ VectorExpression ve1 = new FilterLongColGreaterLongColumn(0,1);
+ VectorExpression ve2 = new FilterLongColEqualDoubleScalar(2, 0);
+ VectorExpression ve3 = new FilterExprAndExpr();
+ ve3.setChildExpressions(new VectorExpression[] {ve1, ve2});
+
+ vfo.setFilterCondition(ve3);
+ }
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java
new file mode 100644
index 00000000000..28678af0bc3
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorOperator.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+
+public abstract class TestVectorOperator {
+
+ protected HiveConf hiveConf = new HiveConf();
+
+ public enum FakeDataSampleType {
+ OrderedSequence,
+ Random,
+ Repeated
+ }
+
+ public class FakeDataReader {
+ private final int size;
+ private final VectorizedRowBatch vrg;
+ private int currentSize = 0;
+ private final int len = 1024;
+
+ public FakeDataReader(int size, int numCols, FakeDataSampleType
fakeDataSampleType) {
+ this.size = size;
+ vrg = new VectorizedRowBatch(numCols, len);
+ for (int i = 0; i < numCols; i++) {
+ try {
+ Thread.sleep(2);
+ } catch (InterruptedException ignore) {
+ }
+ vrg.cols[i] = getLongVector(fakeDataSampleType);
+ }
+ }
+
+ public VectorizedRowBatch getNext() {
+ if (currentSize >= size) {
+ vrg.size = 0;
+ } else {
+ vrg.size = len;
+ currentSize += vrg.size;
+ vrg.selectedInUse = false;
+ }
+ return vrg;
+ }
+
+ private LongColumnVector getLongVector(FakeDataSampleType
fakeDataSampleType) {
+ LongColumnVector lcv = new LongColumnVector(len);
+
+ switch (fakeDataSampleType) {
+ case OrderedSequence:
+ TestVectorizedRowBatch.setOrderedSequenceLongCol(lcv);
+ break;
+ case Random:
+ TestVectorizedRowBatch.setRandomLongCol(lcv);
+ break;
+ case Repeated:
+ TestVectorizedRowBatch.setRepeatingLongCol(lcv);
+ break;
+ }
+
+ return lcv;
+ }
+ }
+}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
new file mode 100644
index 00000000000..77fce5248e0
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorTopNKeyOperator.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.optimizer.physical.Vectorizer;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.plan.TopNKeyDesc;
+import org.apache.hadoop.hive.ql.plan.VectorTopNKeyDesc;
+import org.junit.Test;
+import org.junit.jupiter.api.Assertions;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TestVectorTopNKeyOperator extends TestVectorOperator {
+
+ @Test
+ public void testTopNHasSelectedSmallerThanBatchDoesNotThrowException()
throws HiveException {
+ List<String> columns = new ArrayList<>();
+ columns.add("col1");
+ TopNKeyDesc topNKeyDesc = new TopNKeyDesc();
+ topNKeyDesc.setCheckEfficiencyNumBatches(1);
+ topNKeyDesc.setTopN(2);
+
+ Operator<? extends OperatorDesc> filterOp =
+ OperatorFactory.get(new CompilationOpContext(), topNKeyDesc);
+
+ VectorizationContext vc = new VectorizationContext("name", columns);
+
+ VectorTopNKeyOperator vfo = (VectorTopNKeyOperator)
Vectorizer.vectorizeTopNKeyOperator(filterOp, vc, new VectorTopNKeyDesc());
+
+ vfo.initialize(hiveConf, null);
+
+ FakeDataReader fdr = new FakeDataReader(1024, 3,
FakeDataSampleType.Repeated);
+ VectorizedRowBatch vrg = fdr.getNext();
+
+ vrg.selected = new int[] { 1, 2, 3, 4};
+
+ Assertions.assertDoesNotThrow(() -> vfo.process(vrg, 0));
+ }
+}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
index 258b47bbec5..0c2c0204d15 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatch.java
@@ -162,6 +162,16 @@ public class TestVectorizedRowBatch {
}
}
+ /**
+ * Set the vector to sample data that is a monotonous sequence of numbers:
0, 1, 2, ...
+ * @param col
+ */
+ public static void setOrderedSequenceLongCol(LongColumnVector col) {
+ int size = col.vector.length;
+ for(int i = 0; i < size; i++) {
+ col.vector[i] = i;
+ }
+ }
/**
* Set the vector to sample data that repeats an iteration from 0 to 99.