Author: sershe
Date: Mon Mar 24 23:25:08 2014
New Revision: 1581114

URL: http://svn.apache.org/r1581114
Log:
HIVE-6682 : nonstaged mapjoin table memory check may be broken (Sergey 
Shelukhin, reviewed by Navis)

Added:
    
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
    
hive/branches/branch-0.13/ql/src/test/results/clientpositive/mapjoin_memcheck.q.out
Modified:
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/TemporaryHashSinkOperator.java
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
    
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
    
hive/branches/branch-0.13/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java
 Mon Mar 24 23:25:08 2014
@@ -56,7 +56,7 @@ import org.apache.hadoop.util.Reflection
 public class HashTableSinkOperator extends TerminalOperator<HashTableSinkDesc> 
implements
     Serializable {
   private static final long serialVersionUID = 1L;
-  private static final Log LOG = 
LogFactory.getLog(HashTableSinkOperator.class.getName());
+  protected static final Log LOG = 
LogFactory.getLog(HashTableSinkOperator.class.getName());
 
   /**
    * The expressions for join inputs's join keys.

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/TemporaryHashSinkOperator.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/TemporaryHashSinkOperator.java?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/TemporaryHashSinkOperator.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/exec/TemporaryHashSinkOperator.java
 Mon Mar 24 23:25:08 2014
@@ -18,6 +18,8 @@
 
 package org.apache.hadoop.hive.ql.exec;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc;
 import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
@@ -27,6 +29,12 @@ import java.io.IOException;
 public class TemporaryHashSinkOperator extends HashTableSinkOperator {
   public TemporaryHashSinkOperator(MapJoinDesc desc) {
     conf = new HashTableSinkDesc(desc);
+
+    // Sanity check the config.
+    assert conf.getHashtableMemoryUsage() != 0;
+    if (conf.getHashtableMemoryUsage() == 0) {
+      LOG.error("Hash table memory usage not set in map join operator; 
non-staged load may fail");
+    }
   }
 
   @Override

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java
 Mon Mar 24 23:25:08 2014
@@ -24,6 +24,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.Stack;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.GroupByOperator;
 import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
@@ -58,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.Ta
  * OOM in group by operator.
  */
 public final class LocalMapJoinProcFactory {
+  private static final Log LOG = 
LogFactory.getLog(LocalMapJoinProcFactory.class);
 
   public static NodeProcessor getJoinProc() {
     return new LocalMapJoinProcessor();
@@ -133,6 +136,9 @@ public final class LocalMapJoinProcFacto
         hashtableMemoryUsage = conf.getFloatVar(
             HiveConf.ConfVars.HIVEHASHTABLEMAXMEMORYUSAGE);
       }
+      mapJoinOp.getConf().setHashTableMemoryUsage(hashtableMemoryUsage);
+      LOG.info("Setting max memory usage to " + hashtableMemoryUsage + " for 
table sink "
+          + (context.isFollowedByGroupBy() ? "" : "not") + " followed by group 
by");
       hashTableSinkOp.getConf().setHashtableMemoryUsage(hashtableMemoryUsage);
 
       // get the last operator for processing big tables

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/HashTableSinkDesc.java
 Mon Mar 24 23:25:08 2014
@@ -114,6 +114,7 @@ public class HashTableSinkDesc extends J
     this.retainList = clone.getRetainList();
     this.dumpFilePrefix = clone.getDumpFilePrefix();
     this.bucketMapjoinContext = new BucketMapJoinContext(clone);
+    this.hashtableMemoryUsage = clone.getHashTableMemoryUsage();
   }
 
 

Modified: 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
 (original)
+++ 
hive/branches/branch-0.13/ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java
 Mon Mar 24 23:25:08 2014
@@ -61,6 +61,9 @@ public class MapJoinDesc extends JoinDes
   // flag for bucket map join. One usage is to set BucketizedHiveInputFormat
   private boolean isBucketMapJoin;
 
+  // Hash table memory usage allowed; used in case of non-staged mapjoin.
+  private float hashtableMemoryUsage;
+
   public MapJoinDesc() {
     bigTableBucketNumMapping = new LinkedHashMap<String, Integer>();
   }
@@ -269,4 +272,12 @@ public class MapJoinDesc extends JoinDes
   public void setBucketMapJoin(boolean isBucketMapJoin) {
     this.isBucketMapJoin = isBucketMapJoin;
   }
+
+  public void setHashTableMemoryUsage(float hashtableMemoryUsage) {
+    this.hashtableMemoryUsage = hashtableMemoryUsage;
+  }
+
+  public float getHashTableMemoryUsage() {
+    return hashtableMemoryUsage;
+  }
 }

Added: 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/mapjoin_memcheck.q
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/queries/clientpositive/mapjoin_memcheck.q?rev=1581114&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/mapjoin_memcheck.q 
(added)
+++ 
hive/branches/branch-0.13/ql/src/test/queries/clientpositive/mapjoin_memcheck.q 
Mon Mar 24 23:25:08 2014
@@ -0,0 +1,16 @@
+
+set hive.auto.convert.join = true;
+
+create table src0 like src;
+insert into table src0 select * from src where src.key < 10;
+
+set hive.mapjoin.check.memory.rows=1;
+
+explain 
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1;
+
+drop table src0;
\ No newline at end of file

Modified: 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out?rev=1581114&r1=1581113&r2=1581114&view=diff
==============================================================================
Files 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out
 (original) and 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/auto_sortmerge_join_2.q.out
 Mon Mar 24 23:25:08 2014 differ

Added: 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/mapjoin_memcheck.q.out
URL: 
http://svn.apache.org/viewvc/hive/branches/branch-0.13/ql/src/test/results/clientpositive/mapjoin_memcheck.q.out?rev=1581114&view=auto
==============================================================================
--- 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/mapjoin_memcheck.q.out
 (added)
+++ 
hive/branches/branch-0.13/ql/src/test/results/clientpositive/mapjoin_memcheck.q.out
 Mon Mar 24 23:25:08 2014
@@ -0,0 +1,128 @@
+PREHOOK: query: create table src0 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table src0 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@src0
+PREHOOK: query: insert into table src0 select * from src where src.key < 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@src0
+POSTHOOK: query: insert into table src0 select * from src where src.key < 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@src0
+POSTHOOK: Lineage: src0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: src0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: explain 
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain 
+select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: src0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: src0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src2
+            Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
+            Map Join Operator
+              condition map:
+                   Inner Join 0 to 1
+              condition expressions:
+                0 {key} {value}
+                1 {key} {value}
+              keys:
+                0 key (type: string)
+                1 key (type: string)
+              outputColumnNames: _col0, _col1, _col4, _col5
+              Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
+              Select Operator
+                expressions: _col0 (type: string), _col1 (type: string), _col4 
(type: string), _col5 (type: string)
+                outputColumnNames: _col0, _col1, _col2, _col3
+                Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: string), _col1 (type: string)
+                  sort order: ++
+                  Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: 
string), _col2 (type: string), _col3 (type: string)
+      Local Work:
+        Map Reduce Local Work
+          Alias -> Map Local Tables:
+            src1 
+              Fetch Operator
+                limit: -1
+          Alias -> Map Local Operator Tree:
+            src1 
+              TableScan
+                alias: src1
+                Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE 
Column stats: NONE
+      Reduce Operator Tree:
+        Extract
+          Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column 
stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE 
Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+PREHOOK: query: select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src0
+#### A masked pattern was here ####
+POSTHOOK: query: select src1.key as k1, src1.value as v1, src2.key, src2.value
+from src0 src1 inner join src0 src2 on src1.key = src2.key order by k1, v1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src0
+#### A masked pattern was here ####
+POSTHOOK: Lineage: src0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: src0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+0      val_0   0       val_0
+2      val_2   2       val_2
+4      val_4   4       val_4
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+5      val_5   5       val_5
+8      val_8   8       val_8
+9      val_9   9       val_9
+PREHOOK: query: drop table src0
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@src0
+PREHOOK: Output: default@src0
+POSTHOOK: query: drop table src0
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@src0
+POSTHOOK: Output: default@src0
+POSTHOOK: Lineage: src0.key SIMPLE [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: src0.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]


Reply via email to