This is an automated email from the ASF dual-hosted git repository.
rbalamohan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new f7be5fe HIVE-23917: Reset key access count during eviction in
VectorGroupByOperator (#1306)
f7be5fe is described below
commit f7be5fe240e5e057edff9c14f03f6cc17367bc92
Author: rbalamohan <[email protected]>
AuthorDate: Wed Jul 29 09:13:11 2020 +0530
HIVE-23917: Reset key access count during eviction in VectorGroupByOperator
(#1306)
---
.../hive/ql/exec/vector/VectorAggregationBufferRow.java | 4 ++++
.../hadoop/hive/ql/exec/vector/VectorGroupByOperator.java | 4 +++-
.../hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java | 11 +++++++++++
3 files changed, 18 insertions(+), 1 deletion(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
index a7ef154..a265e52 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAggregationBufferRow.java
@@ -89,4 +89,8 @@ public class VectorAggregationBufferRow {
public void incrementAccessCount() {
accessed++;
}
+
+ public void resetAccessCount() {
+ accessed = 0;
+ }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
index 85535f5..02864d9 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
@@ -599,8 +599,10 @@ public class VectorGroupByOperator extends
Operator<GroupByDesc>
while(iter.hasNext()) {
Map.Entry<KeyWrapper, VectorAggregationBufferRow> pair = iter.next();
if (!all && avgAccess >= 1) {
- // Retain entries when access pattern is > than average access
if (pair.getValue().getAccessCount() > avgAccess) {
+ // resetting to give chance for other entries
+ totalAccessCount -= pair.getValue().getAccessCount();
+ pair.getValue().resetAccessCount();
continue;
}
}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
index c22a833..d6a8548 100644
---
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
+++
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java
@@ -692,11 +692,18 @@ public class TestVectorGroupByOperator {
// This processing would trigger flush
for (VectorizedRowBatch unit: data) {
+ long zeroAccessBeforeFlush =
getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
vgo.process(unit, 0);
long freqElementsAfterFlush =
getElementsHigherThan(processingMode.mapKeysAggregationBuffers, avgAccess);
assertTrue("After flush: " + freqElementsAfterFlush + ", before flush: "
+ numElementsToBeRetained,
(freqElementsAfterFlush >= numElementsToBeRetained));
+
+ // ensure that freq elements are reset for providing chance for others
+ long zeroAccessAfterFlush =
getElementsWithZeroAccess(processingMode.mapKeysAggregationBuffers);
+ assertTrue("After flush: " + zeroAccessAfterFlush + ", before flush: " +
zeroAccessBeforeFlush,
+ (zeroAccessAfterFlush > zeroAccessBeforeFlush));
+
break;
}
vgo.close(false);
@@ -706,6 +713,10 @@ public class TestVectorGroupByOperator {
return aggMap.values().stream().filter(v -> (v.getAccessCount() >
avgAccess)).count();
}
+ long getElementsWithZeroAccess(Map<KeyWrapper, VectorAggregationBufferRow>
aggMap) {
+ return aggMap.values().stream().filter(v -> (v.getAccessCount() ==
0)).count();
+ }
+
@Test
public void testMaxHTEntriesFlush() throws HiveException {