Repository: hive Updated Branches: refs/heads/master da6638666 -> 1b7f62b05
HIVE-19752: PerfLogger integration for critical Hive-on-S3 paths (Sahil Takiar, reviewed by Vihang Karajgaonkar) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1b7f62b0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1b7f62b0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1b7f62b0 Branch: refs/heads/master Commit: 1b7f62b05daef3992d8637dcf09d0037177f7527 Parents: da66386 Author: Sahil Takiar <takiar.sa...@gmail.com> Authored: Thu May 31 16:51:33 2018 -0500 Committer: Sahil Takiar <stak...@cloudera.com> Committed: Tue Jun 5 08:24:36 2018 -0500 ---------------------------------------------------------------------- .../apache/hadoop/hive/ql/log/PerfLogger.java | 6 +++++ .../apache/hadoop/hive/ql/exec/MoveTask.java | 7 ++++++ .../apache/hadoop/hive/ql/exec/Utilities.java | 5 ++++ .../apache/hadoop/hive/ql/metadata/Hive.java | 26 +++++++++++++++++--- 4 files changed, 41 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java index 3d6315c..111e614 100644 --- a/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java +++ b/common/src/java/org/apache/hadoop/hive/ql/log/PerfLogger.java @@ -46,6 +46,7 @@ public class PerfLogger { public static final String DO_AUTHORIZATION = "doAuthorization"; public static final String DRIVER_EXECUTE = "Driver.execute"; public static final String INPUT_SUMMARY = "getInputSummary"; + public static final String INPUT_PATHS = "getInputPaths"; public static final String GET_SPLITS = "getSplits"; public static final String RUN_TASKS = "runTasks"; public static final String SERIALIZE_PLAN = "serializePlan"; @@ -85,6 +86,11 @@ public class PerfLogger { public static final String SPARK_OPTIMIZE_TASK_TREE = "SparkOptimizeTaskTree"; public static final String SPARK_FLUSH_HASHTABLE = "SparkFlushHashTable."; + public static final String FILE_MOVES = "FileMoves"; + public static final String LOAD_TABLE = "LoadTable"; + public static final String LOAD_PARTITION = "LoadPartition"; + public static final String LOAD_DYNAMIC_PARTITIONS = "LoadDynamicPartitions"; + protected final Map<String, Long> startTimes = new HashMap<String, Long>(); protected final Map<String, Long> endTimes = new HashMap<String, Long>(); http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index dbda5fd..f80a945 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLock; import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager; import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj; import org.apache.hadoop.hive.ql.lockmgr.LockException; +import org.apache.hadoop.hive.ql.log.PerfLogger; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -60,6 +61,7 @@ import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -88,6 +90,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable { private void moveFile(Path sourcePath, Path targetPath, boolean isDfsDir) throws HiveException { try { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); + String mesg = "Moving data to " + (isDfsDir ? "" : "local ") + "directory " + targetPath.toString(); String mesg_detail = " from " + sourcePath.toString(); @@ -101,6 +106,8 @@ public class MoveTask extends Task<MoveWork> implements Serializable { FileSystem dstFs = FileSystem.getLocal(conf); moveFileFromDfsToLocal(sourcePath, targetPath, fs, dstFs); } + + perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); } catch (Exception e) { throw new HiveException("Unable to move source " + sourcePath + " to destination " + targetPath, e); http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 406bea0..2177c33 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -3285,6 +3285,9 @@ public final class Utilities { public static List<Path> getInputPaths(JobConf job, MapWork work, Path hiveScratchDir, Context ctx, boolean skipDummy) throws Exception { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.INPUT_PATHS); + Set<Path> pathsProcessed = new HashSet<Path>(); List<Path> pathsToAdd = new LinkedList<Path>(); LockedDriverState lDrvStat = LockedDriverState.getLockedDriverState(); @@ -3373,6 +3376,8 @@ public final class Utilities { } } + perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.INPUT_PATHS); + return finalPathsToAdd; } http://git-wip-us.apache.org/repos/asf/hive/blob/1b7f62b0/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java index 107d032..ef7be03 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java @@ -1655,6 +1655,9 @@ public class Hive { boolean isFullAcidTable = AcidUtils.isFullAcidTable(tbl); boolean isTxnTable = AcidUtils.isTransactionalTable(tbl); try { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_PARTITION); + // Get the partition object if it already exists Partition oldPart = getPartition(tbl, partSpec, false); /** @@ -1690,8 +1693,8 @@ public class Hive { newPartPath = oldPartPath; } List<Path> newFiles = null; - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin("MoveTask", "FileMoves"); + + perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); // If config is set, table is not temporary and partition being inserted exists, capture // the list of files added. For not yet existing partitions (insert overwrite to new partition // or dynamic partition inserts), the add partition event will capture the list of files added. @@ -1749,7 +1752,7 @@ public class Hive { tbl.getNumBuckets() > 0, isFullAcidTable); } } - perfLogger.PerfLogEnd("MoveTask", "FileMoves"); + perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); Partition newTPart = oldPart != null ? oldPart : new Partition(tbl, partSpec, newPartPath); alterPartitionSpecInMemory(tbl, partSpec, newTPart.getTPartition(), inheritTableSpecs, newPartPath.toString()); validatePartition(newTPart); @@ -1833,6 +1836,7 @@ public class Hive { } else { setStatsPropAndAlterPartition(hasFollowingStatsTask, tbl, newTPart); } + perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_PARTITION); return newTPart; } catch (IOException e) { LOG.error(StringUtils.stringifyException(e)); @@ -2125,6 +2129,9 @@ private void constructOneLBLocationMap(FileStatus fSta, final boolean hasFollowingStatsTask, final AcidUtils.Operation operation, boolean isInsertOverwrite) throws HiveException { + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS); + final Map<Map<String, String>, Partition> partitionsMap = Collections.synchronizedMap(new LinkedHashMap<Map<String, String>, Partition>()); @@ -2234,6 +2241,9 @@ private void constructOneLBLocationMap(FileStatus fSta, AcidUtils.toDataOperationType(operation)); } LOG.info("Loaded " + partitionsMap.size() + " partitions"); + + perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_DYNAMIC_PARTITIONS); + return partitionsMap; } catch (TException te) { throw new HiveException("Exception updating metastore for acid table " @@ -2267,6 +2277,10 @@ private void constructOneLBLocationMap(FileStatus fSta, public void loadTable(Path loadPath, String tableName, LoadFileType loadFileType, boolean isSrcLocal, boolean isSkewedStoreAsSubdir, boolean isAcidIUDoperation, boolean hasFollowingStatsTask, Long writeId, int stmtId, boolean isInsertOverwrite) throws HiveException { + + PerfLogger perfLogger = SessionState.getPerfLogger(); + perfLogger.PerfLogBegin("MoveTask", PerfLogger.LOAD_TABLE); + List<Path> newFiles = null; Table tbl = getTable(tableName); assert tbl.getPath() != null : "null==getPath() for " + tbl.getTableName(); @@ -2304,6 +2318,9 @@ private void constructOneLBLocationMap(FileStatus fSta, } Utilities.FILE_OP_LOGGER.debug("moving " + loadPath + " to " + tblPath + " (replace = " + loadFileType + ")"); + + perfLogger.PerfLogBegin("MoveTask", PerfLogger.FILE_MOVES); + if (loadFileType == LoadFileType.REPLACE_ALL && !isTxnTable) { //for fullAcid we don't want to delete any files even for OVERWRITE see HIVE-14988/HIVE-17361 boolean isAutopurge = "true".equalsIgnoreCase(tbl.getProperty("auto.purge")); @@ -2321,6 +2338,7 @@ private void constructOneLBLocationMap(FileStatus fSta, throw new HiveException("addFiles: filesystem error in check phase", e); } } + perfLogger.PerfLogEnd("MoveTask", PerfLogger.FILE_MOVES); } if (!this.getConf().getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) { StatsSetupConst.setBasicStatsState(tbl.getParameters(), StatsSetupConst.FALSE); @@ -2354,6 +2372,8 @@ private void constructOneLBLocationMap(FileStatus fSta, alterTable(tbl, environmentContext); fireInsertEvent(tbl, null, (loadFileType == LoadFileType.REPLACE_ALL), newFiles); + + perfLogger.PerfLogEnd("MoveTask", PerfLogger.LOAD_TABLE); } /**