Repository: hive
Updated Branches:
  refs/heads/master da6638666 -> 1b7f62b05


HIVE-19752: PerfLogger integration for critical Hive-on-S3 paths (Sahil Takiar, 
reviewed by Vihang Karajgaonkar)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1b7f62b0
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1b7f62b0
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1b7f62b0

Branch: refs/heads/master
Commit: 1b7f62b05daef3992d8637dcf09d0037177f7527
Parents: da66386
Author: Sahil Takiar <takiar.sa...@gmail.com>
Authored: Thu May 31 16:51:33 2018 -0500
Committer: Sahil Takiar <stak...@cloudera.com>
Committed: Tue Jun 5 08:24:36 2018 -0500

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/log/PerfLogger.java   |  6 +++++
 .../apache/hadoop/hive/ql/exec/MoveTask.java    |  7 ++++++
 .../apache/hadoop/hive/ql/exec/Utilities.java   |  5 ++++
 .../apache/hadoop/hive/ql/metadata/Hive.java    | 26 +++++++++++++++++---
 4 files changed, 41 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java 
b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
index 3d6315c..111e614 100644
--- a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
+++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java
@@ -46,6 +46,7 @@ public class PerfLogger {
   public static final String DO_AUTHORIZATION = "doAuthorization";
   public static final String DRIVER_EXECUTE = "Driver.execute";
   public static final String INPUT_SUMMARY = "getInputSummary";
+  public static final String INPUT_PATHS = "getInputPaths";
   public static final String GET_SPLITS = "getSplits";
   public static final String RUN_TASKS = "runTasks";
   public static final String SERIALIZE_PLAN = "serializePlan";
@@ -85,6 +86,11 @@ public class PerfLogger {
   public static final String SPARK_OPTIMIZE_TASK_TREE = 
"SparkOptimizeTaskTree";
   public static final String SPARK_FLUSH_HASHTABLE = "SparkFlushHashTable.";
 
+  public static final String FILE_MOVES = "FileMoves";
+  public static final String LOAD_TABLE = "LoadTable";
+  public static final String LOAD_PARTITION = "LoadPartition";
+  public static final String LOAD_DYNAMIC_PARTITIONS = "LoadDynamicPartitions";
+
   protected final Map<String, Long> startTimes = new HashMap<String, Long>();
   protected final Map<String, Long> endTimes = new HashMap<String, Long>();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index dbda5fd..f80a945 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.MapredWork;
 import org.apache.hadoop.hive.ql.plan.MoveWork;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.util.StringUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -88,6 +90,9 @@ public class MoveTask extends Task<MoveWork> implements 
Serializable {
   private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir)
       throws HiveException {
     try {
+      PerfLogger perfLogger = SessionState.getPerfLogger();
+      perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
+
       String mesg = "Moving data to " + (isDfsDir ? "" : "local ") + 
"directory "
           + targetPath.toString();
       String mesg_detail = " from " + sourcePath.toString();
@@ -101,6 +106,8 @@ public class MoveTask extends Task<MoveWork> implements 
Serializable {
         FileSystem dstFs = FileSystem.getLocal(conf);
         moveFileFromDfsToLocal(sourcePath, targetPath, fs, dstFs);
       }
+
+      perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
     } catch (Exception e) {
       throw new HiveException("Unable to move source " + sourcePath + " to 
destination "
           + targetPath, e);

http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 406bea0..2177c33 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -3285,6 +3285,9 @@ public final class Utilities {
   public static List<Path> getInputPaths(JobConf job, MapWork work, Path 
hiveScratchDir,
       Context ctx, boolean skipDummy) throws Exception {
 
+    PerfLogger perfLogger = SessionState.getPerfLogger();
+    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_PATHS);
+
     Set<Path> pathsProcessed = new HashSet<Path>();
     List<Path> pathsToAdd = new LinkedList<Path>();
     LockedDriverState lDrvStat = LockedDriverState.getLockedDriverState();
@@ -3373,6 +3376,8 @@ public final class Utilities {
       }
     }
 
+    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_PATHS);
+
     return finalPathsToAdd;
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 107d032..ef7be03 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -1655,6 +1655,9 @@ public class Hive {
     boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl);
     boolean isTxnTable = AcidUtils.isTransactionalTable(tbl);
     try {
+      PerfLogger perfLogger = SessionState.getPerfLogger();
+      perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_PARTITION);
+
       // Get the partition object if it already exists
       Partition oldPart = getPartition(tbl, partSpec, false);
       /**
@@ -1690,8 +1693,8 @@ public class Hive {
         newPartPath = oldPartPath;
       }
       List<Path> newFiles = null;
-      PerfLogger perfLogger = SessionState.getPerfLogger();
-      perfLogger.PerfLogBegin("MoveTask", "FileMoves");
+
+      perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
       // If config is set, table is not temporary and partition being inserted 
exists, capture
       // the list of files added. For not yet existing partitions (insert 
overwrite to new partition
       // or dynamic partition inserts), the add partition event will capture 
the list of files added.
@@ -1749,7 +1752,7 @@ public class Hive {
               tbl.getNumBuckets() > 0, isFullAcidTable);
         }
       }
-      perfLogger.PerfLogEnd("MoveTask", "FileMoves");
+      perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
       Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, 
partSpec, newPartPath);
       alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), 
inheritTableSpecs, newPartPath.toString());
       validatePartition(newTPart);
@@ -1833,6 +1836,7 @@ public class Hive {
       } else {
         setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart);
       }
+      perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION);
       return newTPart;
     } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
@@ -2125,6 +2129,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
       final boolean hasFollowingStatsTask, final AcidUtils.Operation operation,
       boolean isInsertOverwrite) throws HiveException {
 
+    PerfLogger perfLogger = SessionState.getPerfLogger();
+    perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS);
+
     final Map<Map<String, String>, Partition> partitionsMap =
         Collections.synchronizedMap(new LinkedHashMap<Map<String, String>, 
Partition>());
 
@@ -2234,6 +2241,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
                 AcidUtils.toDataOperationType(operation));
       }
       LOG.info("Loaded " + partitionsMap.size() + " partitions");
+
+      perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS);
+
       return partitionsMap;
     } catch (TException te) {
       throw new HiveException("Exception updating metastore for acid table "
@@ -2267,6 +2277,10 @@ private void constructOneLBLocationMap(FileStatus fSta,
   public void loadTable(Path loadPath, String tableName, LoadFileType 
loadFileType, boolean isSrcLocal,
       boolean isSkewedStoreAsSubdir, boolean isAcidIUDoperation, boolean 
hasFollowingStatsTask,
       Long writeId, int stmtId, boolean isInsertOverwrite) throws 
HiveException {
+
+    PerfLogger perfLogger = SessionState.getPerfLogger();
+    perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_TABLE);
+
     List<Path> newFiles = null;
     Table tbl = getTable(tableName);
     assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName();
@@ -2304,6 +2318,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
       }
       Utilities.FILE_OP_LOGGER.debug("moving " + loadPath + " to " + tblPath
           + " (replace = " + loadFileType + ")");
+
+      perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES);
+
       if (loadFileType == LoadFileType.REPLACE_ALL && !isTxnTable) {
         //for fullAcid we don't want to delete any files even for OVERWRITE 
see HIVE-14988/HIVE-17361
         boolean isAutopurge = 
"true".equalsIgnoreCase(tbl.getProperty("auto.purge"));
@@ -2321,6 +2338,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
           throw new HiveException("addFiles: filesystem error in check phase", 
e);
         }
       }
+      perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES);
     }
     if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
       StatsSetupConst.setBasicStatsState(tbl.getParameters(), 
StatsSetupConst.FALSE);
@@ -2354,6 +2372,8 @@ private void constructOneLBLocationMap(FileStatus fSta,
     alterTable(tbl, environmentContext);
 
     fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), 
newFiles);
+
+    perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_TABLE);
   }
 
   /**

Reply via email to