hive git commit: HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/master 213efd70b -> 369f0f241 HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/369f0f24 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/369f0f24 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/369f0f24 Branch: refs/heads/master Commit: 369f0f241fd3dc90e81d267bf16db61ea90db647 Parents: 213efd7 Author: Prasanth Jayachandran Authored: Sun Oct 14 21:34:08 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Oct 14 21:34:08 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 48 +++- .../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 + 3 files changed, 91 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/369f0f24/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index cc6239c..29958b3 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1949,6 +1949,10 @@ public class HiveConf extends Configuration { " ETL strategy is used when spending little more time in split generation is acceptable" + " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + " based on heuristics."), + HIVE_ORC_WRITER_LLAP_MEMORY_MANAGER_ENABLED("hive.exec.orc.writer.llap.memory.manager.enabled", true, + "Whether orc writers should use llap-aware memory manager. LLAP aware memory manager will use memory\n" + +"per executor instead of entire heap memory when concurrent orc writers are involved. This will let\n" + +"task fragments to use memory within its limit (memory per executor) when performing ETL in LLAP."), // hive streaming ingest settings HIVE_STREAMING_AUTO_FLUSH_ENABLED("hive.streaming.auto.flush.enabled", true, "Whether to enable memory \n" + http://git-wip-us.apache.org/repos/asf/hive/blob/369f0f24/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e7dfb05..e246ac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -24,20 +24,29 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapDaemonInfo; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.orc.FileMetadata; +import org.apache.orc.OrcConf; import org.apache.orc.PhysicalWriter; import org.apache.orc.MemoryManager; import org.apache.orc.TypeDescription; +import org.apache.orc.impl.MemoryManagerImpl; import org.apache.orc.impl.OrcTail; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * Contains factory methods to read or write ORC files. */ public final class OrcFile extends org.apache.orc.OrcFile { - + private static final Logger LOG = LoggerFactory.getLogger(OrcFile.class); // unused protected OrcFile() {} @@ -96,6 +105,37 @@ public final class OrcFile extends org.apache.orc.OrcFile { return new ReaderImpl(path, options); } + @VisibleForTesting + static class LlapAwareMemoryManager extends MemoryManagerImpl { +private final double maxLoad; +private final long totalMemoryPool; + +public LlapAwareMemoryManager(Configuration conf) { + super(conf); + maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); + long memPerExecutor = LlapDaemonInfo.INSTANCE.getMemoryPerExecutor(); + totalMemoryPool = (long) (memPerExecutor * maxLoad); + if (LOG.isDebugEnabled()) { +LOG.debug("Using LLAP memory manager for orc writer. memPerExecutor: {} maxLoad: {} totalMemPool: {}", + LlapUtil.humanReadableByteCount(memPerExecutor), maxLoad, LlapUtil.humanReadableByteCount(totalMemoryPool));
hive git commit: HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin)
Repository: hive Updated Branches: refs/heads/branch-3 2048f6262 -> 1ce6c7c2a HIVE-20649: LLAP aware memory manager for Orc writers (Prasanth Jayachandran reviewed by Sergey Shelukhin) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1ce6c7c2 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1ce6c7c2 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1ce6c7c2 Branch: refs/heads/branch-3 Commit: 1ce6c7c2a3a93f0f92078ba8c929a870eaa8134d Parents: 2048f62 Author: Prasanth Jayachandran Authored: Sun Oct 14 21:34:08 2018 -0700 Committer: Prasanth Jayachandran Committed: Sun Oct 14 21:34:42 2018 -0700 -- .../org/apache/hadoop/hive/conf/HiveConf.java | 4 ++ .../apache/hadoop/hive/ql/io/orc/OrcFile.java | 48 +++- .../hadoop/hive/ql/io/orc/TestOrcFile.java | 41 + 3 files changed, 91 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hive/blob/1ce6c7c2/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java -- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3852d79..a04ef38 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1923,6 +1923,10 @@ public class HiveConf extends Configuration { " ETL strategy is used when spending little more time in split generation is acceptable" + " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + " based on heuristics."), + HIVE_ORC_WRITER_LLAP_MEMORY_MANAGER_ENABLED("hive.exec.orc.writer.llap.memory.manager.enabled", true, + "Whether orc writers should use llap-aware memory manager. LLAP aware memory manager will use memory\n" + +"per executor instead of entire heap memory when concurrent orc writers are involved. This will let\n" + +"task fragments to use memory within its limit (memory per executor) when performing ETL in LLAP."), // hive streaming ingest settings HIVE_STREAMING_AUTO_FLUSH_ENABLED("hive.streaming.auto.flush.enabled", true, "Whether to enable memory \n" + http://git-wip-us.apache.org/repos/asf/hive/blob/1ce6c7c2/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java -- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java index e7dfb05..e246ac2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java @@ -24,20 +24,29 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.llap.LlapDaemonInfo; +import org.apache.hadoop.hive.llap.LlapUtil; +import org.apache.hadoop.hive.llap.io.api.LlapProxy; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.orc.FileMetadata; +import org.apache.orc.OrcConf; import org.apache.orc.PhysicalWriter; import org.apache.orc.MemoryManager; import org.apache.orc.TypeDescription; +import org.apache.orc.impl.MemoryManagerImpl; import org.apache.orc.impl.OrcTail; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; /** * Contains factory methods to read or write ORC files. */ public final class OrcFile extends org.apache.orc.OrcFile { - + private static final Logger LOG = LoggerFactory.getLogger(OrcFile.class); // unused protected OrcFile() {} @@ -96,6 +105,37 @@ public final class OrcFile extends org.apache.orc.OrcFile { return new ReaderImpl(path, options); } + @VisibleForTesting + static class LlapAwareMemoryManager extends MemoryManagerImpl { +private final double maxLoad; +private final long totalMemoryPool; + +public LlapAwareMemoryManager(Configuration conf) { + super(conf); + maxLoad = OrcConf.MEMORY_POOL.getDouble(conf); + long memPerExecutor = LlapDaemonInfo.INSTANCE.getMemoryPerExecutor(); + totalMemoryPool = (long) (memPerExecutor * maxLoad); + if (LOG.isDebugEnabled()) { +LOG.debug("Using LLAP memory manager for orc writer. memPerExecutor: {} maxLoad: {} totalMemPool: {}", + LlapUtil.humanReadableByteCount(memPerExecutor), maxLoad,