hive git commit: HIVE-20515 : Empty query results when using results cache and query temp dir, results cache dir in different filesystems (Jason Dere via Thejas Nair)

thejas Sat, 08 Sep 2018 22:07:19 -0700

Repository: hive
Updated Branches:
  refs/heads/master b1a917c3a -> 99b8c370f



HIVE-20515 : Empty query results when using results cache and query temp dir, 
results cache dir in different filesystems (Jason Dere via Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/99b8c370
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/99b8c370
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/99b8c370

Branch: refs/heads/master
Commit: 99b8c370f6e13862788e86b1977b71df4f3d3bd9
Parents: b1a917c
Author: Jason Dere <[email protected]>
Authored: Sat Sep 8 22:06:25 2018 -0700
Committer: Thejas M Nair <[email protected]>
Committed: Sat Sep 8 22:06:25 2018 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |   1 +
 .../ql/cache/results/QueryResultsCache.java     |  12 +-
 .../clientpositive/results_cache_diff_fs.q      |  18 +++
 .../llap/results_cache_diff_fs.q.out            | 135 +++++++++++++++++++
 4 files changed, 165 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/99b8c370/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index a3a70ec..7497af1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -408,6 +408,7 @@ minillap.query.files=acid_bucket_pruning.q,\
   orc_ppd_schema_evol_3a.q,\
   global_limit.q,\
   dynamic_partition_pruning_2.q,\
+  results_cache_diff_fs.q,\
   tez_union_dynamic_partition.q,\
   tez_union_dynamic_partition_2.q,\
   unionDistinct_1.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/99b8c370/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java 
b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
index 1ca7c11..66f3b78 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/cache/results/QueryResultsCache.java
@@ -65,6 +65,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.Entity.Type;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.metadata.events.EventConsumer;
@@ -790,7 +791,16 @@ public final class QueryResultsCache {
     String dirName = UUID.randomUUID().toString();
     Path cachedResultsPath = new Path(cacheDirPath, dirName);
     FileSystem fs = cachedResultsPath.getFileSystem(conf);
-    fs.rename(queryResultsPath, cachedResultsPath);
+    try {
+      boolean resultsMoved = Hive.moveFile(conf, queryResultsPath, 
cachedResultsPath, false, false, false);
+      if (!resultsMoved) {
+        throw new IOException("Failed to move " + queryResultsPath + " to " + 
cachedResultsPath);
+      }
+    } catch (IOException err) {
+      throw err;
+    } catch (Exception err) {
+      throw new IOException("Error moving " + queryResultsPath + " to " + 
cachedResultsPath, err);
+    }
     return cachedResultsPath;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/99b8c370/ql/src/test/queries/clientpositive/results_cache_diff_fs.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/results_cache_diff_fs.q 
b/ql/src/test/queries/clientpositive/results_cache_diff_fs.q
new file mode 100644
index 0000000..0e15b06
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/results_cache_diff_fs.q
@@ -0,0 +1,18 @@
+--! qt:dataset:src
+
+set hive.query.results.cache.enabled=true;
+set hive.query.results.cache.nontransactional.tables.enabled=true;
+set 
hive.query.results.cache.directory=pfile://${system:test.tmp.dir}/results_cache_diff_fs;
+set test.comment=hive.exec.scratchdir is;
+set hive.exec.scratchdir;
+
+explain
+select count(*) from src a join src b on (a.key = b.key);
+select count(*) from src a join src b on (a.key = b.key);
+
+set test.comment="Cache should be used for this query";
+set test.comment;
+explain
+select count(*) from src a join src b on (a.key = b.key);
+select count(*) from src a join src b on (a.key = b.key);
+

http://git-wip-us.apache.org/repos/asf/hive/blob/99b8c370/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out 
b/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out
new file mode 100644
index 0000000..d4e7db8
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/results_cache_diff_fs.q.out
@@ -0,0 +1,135 @@
+#### A masked pattern was here ####
+PREHOOK: query: explain
+select count(*) from src a join src b on (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from src a join src b on (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE)
+        Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: a
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: b
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Reduce Output Operator
+                        key expressions: _col0 (type: string)
+                        sort order: +
+                        Map-reduce partition columns: _col0 (type: string)
+                        Statistics: Num rows: 500 Data size: 43500 Basic 
stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col0 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 791 Data size: 6328 Basic stats: 
COMPLETE Column stats: COMPLETE
+                Group By Operator
+                  aggregations: count()
+                  mode: hash
+                  outputColumnNames: _col0
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Reduce Output Operator
+                    sort order: 
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                    value expressions: _col0 (type: bigint)
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: count(VALUE._col0)
+                mode: mergepartial
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                      output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select count(*) from src a join src b on (a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from src a join src b on (a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1028
+test.comment="Cache should be used for this query"
+PREHOOK: query: explain
+select count(*) from src a join src b on (a.key = b.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select count(*) from src a join src b on (a.key = b.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+      Cached Query Result: true
+
+PREHOOK: query: select count(*) from src a join src b on (a.key = b.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+POSTHOOK: query: select count(*) from src a join src b on (a.key = b.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+1028

hive git commit: HIVE-20515 : Empty query results when using results cache and query temp dir, results cache dir in different filesystems (Jason Dere via Thejas Nair)

Reply via email to