HIVE-20582: Make hflush in hive proto logging configurable (Prasanth Jayachandran reviewed by Thejas M Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/36c33ca0 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/36c33ca0 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/36c33ca0 Branch: refs/heads/branch-3 Commit: 36c33ca066c99dfdb21223a711c0c3f33c85b943 Parents: 29315fc Author: Prasanth Jayachandran <prasan...@apache.org> Authored: Tue Sep 18 13:10:07 2018 -0700 Committer: Prasanth Jayachandran <prasan...@apache.org> Committed: Tue Sep 18 13:20:02 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 3 +++ .../hadoop/hive/ql/hooks/HiveProtoLoggingHook.java | 14 +++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/36c33ca0/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java ---------------------------------------------------------------------- diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 9523640..4ec6368 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -632,6 +632,9 @@ public class HiveConf extends Configuration { HIVE_PROTO_EVENTS_TTL("hive.hook.proto.events.ttl", "7d", new TimeValidator(TimeUnit.DAYS), "Time-To-Live (TTL) of proto event files before cleanup."), + HIVE_PROTO_FILE_PER_EVENT("hive.hook.proto.file.per.event", false, + "Whether each proto event has to be written to separate file. " + + "(Use this for FS that does not hflush immediately like S3A)"), // Hadoop Configuration Properties // Properties with null values are ignored and exist only for the purpose of giving us http://git-wip-us.apache.org/repos/asf/hive/blob/36c33ca0/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java index 49cba4c..aa3a926 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/hooks/HiveProtoLoggingHook.java @@ -185,6 +185,7 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { private int logFileCount = 0; private ProtoMessageWriter<HiveHookEventProto> writer; private LocalDate writerDate; + private boolean eventPerFile; EventLogger(HiveConf conf, Clock clock) { this.clock = clock; @@ -196,6 +197,8 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { LOG.error(ConfVars.HIVE_PROTO_EVENTS_BASE_PATH.varname + " is not set, logging disabled."); } + eventPerFile = conf.getBoolVar(ConfVars.HIVE_PROTO_FILE_PER_EVENT); + LOG.info("Event per file enabled: {}", eventPerFile); DatePartitionedLogger<HiveHookEventProto> tmpLogger = null; try { if (baseDir != null) { @@ -303,7 +306,16 @@ public class HiveProtoLoggingHook implements ExecuteWithHookContext { writerDate = logger.getDateFromDir(writer.getPath().getParent().getName()); } writer.writeProto(event); - writer.hflush(); + if (eventPerFile) { + if (writer != null) { + LOG.debug("Event per file enabled. Closing proto event file: {}", writer.getPath()); + IOUtils.closeQuietly(writer); + } + // rollover to next file + writer = logger.getWriter(logFileName + "_" + ++logFileCount); + } else { + writer.hflush(); + } return; } catch (IOException e) { // Something wrong with writer, lets close and reopen.