This is an automated email from the ASF dual-hosted git repository.
okumin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7aef5c9d8d9 HIVE-29422: Statistics - scaling
numNulls/numTrues/numFalses when scaling rows (#6299)
7aef5c9d8d9 is described below
commit 7aef5c9d8d9345d5b8970c58fd06451957301273
Author: konstantinb <[email protected]>
AuthorDate: Wed Feb 25 21:48:48 2026 -0800
HIVE-29422: Statistics - scaling numNulls/numTrues/numFalses when scaling
rows (#6299)
* HIVE-29422: Statistics - scaling numNulls/numTrues/numFalses when scaling
rows
* HIVE-29422: trigger a rebuild
* HIVE-29422: trigger a rebuild
---
.../org/apache/hadoop/hive/ql/plan/Statistics.java | 23 +-
.../apache/hadoop/hive/ql/plan/TestStatistics.java | 156 +++++++++
.../queries/clientpositive/runtime_stats_scaling.q | 45 +++
.../llap/runtime_stats_scaling.q.out | 369 +++++++++++++++++++++
4 files changed, 592 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 2e36c85e091..78ca89d9fd0 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -345,9 +345,30 @@ public Statistics scaleToRowCount(long newRowCount,
boolean downScaleOnly) {
if (downScaleOnly && newRowCount >= numRows) {
return ret;
}
- // FIXME: using real scaling by new/old ration might yield better results?
ret.numRows = newRowCount;
ret.dataSize = StatsUtils.safeMult(getAvgRowSize(), newRowCount);
+
+ // Adjust column stats to prevent invalid values after scaling: count-based
+ // stats are set to unknown (-1), zero values preserved. Distribution data
cleared.
+ if (ret.columnStats != null) {
+ for (ColStatistics cs : ret.columnStats.values()) {
+ if (cs.getCountDistint() > newRowCount) {
+ cs.setCountDistint(newRowCount);
+ }
+ if (cs.getNumNulls() > 0) {
+ cs.setNumNulls(-1);
+ }
+ if (cs.getNumTrues() > 0) {
+ cs.setNumTrues(-1);
+ }
+ if (cs.getNumFalses() > 0) {
+ cs.setNumFalses(-1);
+ }
+ cs.setBitVectors(null);
+ cs.setHistogram(null);
+ }
+ }
+
return ret;
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java
b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java
new file mode 100644
index 00000000000..ddbd964311d
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/plan/TestStatistics.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.plan;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+import java.util.Arrays;
+
+import org.junit.Test;
+
+public class TestStatistics {
+
+ @Test
+ public void testScaleToRowCountPreventsNegativeNonNullCount() {
+ Statistics stats = new Statistics(10, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("str_col", "string");
+ colStats.setNumNulls(9);
+ colStats.setCountDistint(2);
+ colStats.setAvgColLen(10.0);
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(1, false);
+
+ assertEquals(1, scaled.getNumRows());
+ ColStatistics scaledCol = scaled.getColumnStatisticsFromColName("str_col");
+ assertEquals(-1, scaledCol.getNumNulls());
+ }
+
+ @Test
+ public void testScaleToRowCountCapsCountDistinct() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("col1", "int");
+ colStats.setCountDistint(100);
+ colStats.setNumNulls(0);
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(10, false);
+
+ ColStatistics scaledCol = scaled.getColumnStatisticsFromColName("col1");
+ assertEquals(10, scaledCol.getCountDistint());
+ }
+
+ @Test
+ public void testScaleToRowCountSetsNumNullsToUnknown() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("col1", "string");
+ colStats.setNumNulls(50);
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(10, false);
+
+ ColStatistics scaledCol = scaled.getColumnStatisticsFromColName("col1");
+ assertEquals(-1, scaledCol.getNumNulls());
+ }
+
+ @Test
+ public void testScaleToRowCountSetsBooleanStatsToUnknown() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("bool_col", "boolean");
+ colStats.setNumTrues(30);
+ colStats.setNumFalses(70);
+ colStats.setNumNulls(0);
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(10, false);
+
+ ColStatistics scaledCol =
scaled.getColumnStatisticsFromColName("bool_col");
+ assertEquals(-1, scaledCol.getNumTrues());
+ assertEquals(-1, scaledCol.getNumFalses());
+ }
+
+ @Test
+ public void testScaleToRowCountPreservesZeroBooleanStats() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("bool_col", "boolean");
+ colStats.setNumTrues(0);
+ colStats.setNumFalses(100);
+ colStats.setNumNulls(0);
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(10, false);
+
+ ColStatistics scaledCol =
scaled.getColumnStatisticsFromColName("bool_col");
+ assertEquals(0, scaledCol.getNumTrues());
+ assertEquals(-1, scaledCol.getNumFalses());
+ }
+
+ @Test
+ public void testScaleToRowCountClearsDistributionData() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+ ColStatistics colStats = new ColStatistics("col1", "int");
+ colStats.setNumNulls(0);
+ colStats.setBitVectors(new byte[]{1, 2, 3});
+ colStats.setHistogram(new byte[]{4, 5, 6});
+ stats.setColumnStats(Arrays.asList(colStats));
+
+ Statistics scaled = stats.scaleToRowCount(10, false);
+
+ ColStatistics scaledCol = scaled.getColumnStatisticsFromColName("col1");
+ assertNull(scaledCol.getBitVectors());
+ assertNull(scaledCol.getHistogram());
+ }
+
+ @Test
+ public void testScaleToRowCountMultipleColumns() {
+ Statistics stats = new Statistics(100, 1000, 0, 0);
+
+ ColStatistics col1 = new ColStatistics("int_col", "int");
+ col1.setNumNulls(20);
+ col1.setCountDistint(80);
+
+ ColStatistics col2 = new ColStatistics("str_col", "string");
+ col2.setNumNulls(0);
+ col2.setCountDistint(50);
+
+ ColStatistics col3 = new ColStatistics("bool_col", "boolean");
+ col3.setNumNulls(10);
+ col3.setNumTrues(40);
+ col3.setNumFalses(50);
+
+ stats.setColumnStats(Arrays.asList(col1, col2, col3));
+
+ Statistics scaled = stats.scaleToRowCount(5, false);
+
+ assertEquals(5, scaled.getNumRows());
+
+ ColStatistics scaledCol1 =
scaled.getColumnStatisticsFromColName("int_col");
+ assertEquals(-1, scaledCol1.getNumNulls());
+ assertEquals(5, scaledCol1.getCountDistint());
+
+ ColStatistics scaledCol2 =
scaled.getColumnStatisticsFromColName("str_col");
+ assertEquals(0, scaledCol2.getNumNulls());
+ assertEquals(5, scaledCol2.getCountDistint());
+
+ ColStatistics scaledCol3 =
scaled.getColumnStatisticsFromColName("bool_col");
+ assertEquals(-1, scaledCol3.getNumNulls());
+ assertEquals(-1, scaledCol3.getNumTrues());
+ assertEquals(-1, scaledCol3.getNumFalses());
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/runtime_stats_scaling.q
b/ql/src/test/queries/clientpositive/runtime_stats_scaling.q
new file mode 100644
index 00000000000..378fc576597
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/runtime_stats_scaling.q
@@ -0,0 +1,45 @@
+-- Test runtime statistics scaling: column stats adjustment when row count
changes.
+-- When runtime row count is smaller than compile-time estimate, count-based
stats
+-- (numNulls, numTrues, numFalses) must be adjusted to prevent invalid values.
+
+set hive.fetch.task.conversion=none;
+
+create table t_runtime_scaling (
+ id int,
+ str_col string,
+ bool_col boolean
+);
+
+-- 10 rows: skewed id values create selectivity mismatch
+-- str_col: 9 rows NULL, 1 non-null (tests numNulls for string)
+-- bool_col: 2 true, 1 false, 7 NULL (tests numNulls for boolean)
+insert into t_runtime_scaling values
+ (1, NULL, NULL), (2, NULL, NULL), (3, NULL, NULL), (4, NULL, NULL),
+ (5, NULL, NULL), (6, NULL, true), (7, NULL, true), (8, NULL, NULL),
+ (9, NULL, NULL), (100, 'only_non_null', false);
+
+analyze table t_runtime_scaling compute statistics;
+analyze table t_runtime_scaling compute statistics for columns;
+
+-- Compile-time: estimates ~50% selectivity (5 rows). Runtime: 1 row passes.
+
+-- Test 1: numNulls scaling for string (str_col has 9 nulls, scaled to 1 row)
+explain
+select str_col from t_runtime_scaling where id > 50;
+
+explain reoptimization
+select str_col from t_runtime_scaling where id > 50;
+
+-- Test 2: numNulls scaling for boolean (bool_col has 7 nulls, scaled to 1 row)
+explain
+select bool_col from t_runtime_scaling where id > 50;
+
+explain reoptimization
+select bool_col from t_runtime_scaling where id > 50;
+
+-- Test 3: combined (both columns)
+explain
+select str_col, bool_col from t_runtime_scaling where id > 50;
+
+explain reoptimization
+select str_col, bool_col from t_runtime_scaling where id > 50;
diff --git
a/ql/src/test/results/clientpositive/llap/runtime_stats_scaling.q.out
b/ql/src/test/results/clientpositive/llap/runtime_stats_scaling.q.out
new file mode 100644
index 00000000000..bad17e6daea
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/runtime_stats_scaling.q.out
@@ -0,0 +1,369 @@
+PREHOOK: query: create table t_runtime_scaling (
+ id int,
+ str_col string,
+ bool_col boolean
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t_runtime_scaling
+POSTHOOK: query: create table t_runtime_scaling (
+ id int,
+ str_col string,
+ bool_col boolean
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t_runtime_scaling
+PREHOOK: query: insert into t_runtime_scaling values
+ (1, NULL, NULL), (2, NULL, NULL), (3, NULL, NULL), (4, NULL, NULL),
+ (5, NULL, NULL), (6, NULL, true), (7, NULL, true), (8, NULL, NULL),
+ (9, NULL, NULL), (100, 'only_non_null', false)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t_runtime_scaling
+POSTHOOK: query: insert into t_runtime_scaling values
+ (1, NULL, NULL), (2, NULL, NULL), (3, NULL, NULL), (4, NULL, NULL),
+ (5, NULL, NULL), (6, NULL, true), (7, NULL, true), (8, NULL, NULL),
+ (9, NULL, NULL), (100, 'only_non_null', false)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t_runtime_scaling
+POSTHOOK: Lineage: t_runtime_scaling.bool_col SCRIPT []
+POSTHOOK: Lineage: t_runtime_scaling.id SCRIPT []
+POSTHOOK: Lineage: t_runtime_scaling.str_col SCRIPT []
+PREHOOK: query: analyze table t_runtime_scaling compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+PREHOOK: Output: default@t_runtime_scaling
+POSTHOOK: query: analyze table t_runtime_scaling compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+POSTHOOK: Output: default@t_runtime_scaling
+PREHOOK: query: analyze table t_runtime_scaling compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@t_runtime_scaling
+PREHOOK: Output: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t_runtime_scaling compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@t_runtime_scaling
+POSTHOOK: Output: default@t_runtime_scaling
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select str_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select str_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: Num rows: 10 Data size: 210 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: Num rows: 5 Data size: 105 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: str_col (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 85 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 85 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain reoptimization
+select str_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select str_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select str_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select str_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 10 Data size: 210 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 1 Data size: 89 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: str_col (type: string)
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 85 Basic
stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 85 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: Num rows: 10 Data size: 56 Basic stats: COMPLETE
Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: Num rows: 5 Data size: 28 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: bool_col (type: boolean)
+ outputColumnNames: _col0
+ Statistics: Num rows: 5 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain reoptimization
+select bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 10 Data size: 50 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 1 Data size: 8 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: bool_col (type: boolean)
+ outputColumnNames: _col0
+ Statistics: (RUNTIME) Num rows: 1 Data size: 4 Basic
stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 4 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain
+select str_col, bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain
+select str_col, bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: Num rows: 10 Data size: 226 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: Num rows: 5 Data size: 113 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: str_col (type: string), bool_col (type:
boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 5 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 5 Data size: 93 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain reoptimization
+select str_col, bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select str_col, bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+PREHOOK: query: explain reoptimization
+select str_col, bool_col from t_runtime_scaling where id > 50
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+POSTHOOK: query: explain reoptimization
+select str_col, bool_col from t_runtime_scaling where id > 50
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t_runtime_scaling
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: t_runtime_scaling
+ filterExpr: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 10 Data size: 220 Basic
stats: COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: (id > 50) (type: boolean)
+ Statistics: (RUNTIME) Num rows: 1 Data size: 93 Basic
stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: str_col (type: string), bool_col (type:
boolean)
+ outputColumnNames: _col0, _col1
+ Statistics: (RUNTIME) Num rows: 1 Data size: 89 Basic
stats: COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 89 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+