aajisaka commented on code in PR #4703:
URL: https://github.com/apache/hadoop/pull/4703#discussion_r958182732
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java:
##########
@@ -172,6 +173,9 @@ public AppLogAggregatorImpl(Dispatcher dispatcher,
this.logAggregationContext = logAggregationContext;
this.context = context;
this.nodeId = nodeId;
+ this.enableLocalCleanup =
+ conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLE_LOCAL_CLEANUP,
+ YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLE_LOCAL_CLEANUP);
Review Comment:
If it is false, print warn message to show this configuration is only for
testing and not for any production system.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java:
##########
@@ -234,31 +234,63 @@ private void verifyLocalFileDeletion(
// ensure filesystems were closed
verify(logAggregationService).closeFileSystems(
any(UserGroupInformation.class));
- List<Path> dirList = new ArrayList<>();
- dirList.add(new Path(app1LogDir.toURI()));
- verify(delSrvc, times(2)).delete(argThat(new FileDeletionMatcher(
- delSrvc, user, null, dirList)));
-
- String containerIdStr = container11.toString();
- File containerLogDir = new File(app1LogDir, containerIdStr);
- int count = 0;
- int maxAttempts = 50;
- for (String fileType : new String[] { "stdout", "stderr", "syslog" }) {
- File f = new File(containerLogDir, fileType);
+ boolean filesShouldBeDeleted =
+
this.conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLE_LOCAL_CLEANUP,
+ YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLE_LOCAL_CLEANUP);
+ if (filesShouldBeDeleted) {
+ List<Path> dirList = new ArrayList<>();
+ dirList.add(new Path(app1LogDir.toURI()));
+ verify(delSrvc, times(2)).delete(argThat(new FileDeletionMatcher(
+ delSrvc, user, null, dirList)));
+
+ String containerIdStr = container11.toString();
+ File containerLogDir = new File(app1LogDir, containerIdStr);
+ int count = 0;
+ int maxAttempts = 50;
+ for (String fileType : new String[]{"stdout", "stderr", "syslog"}) {
+ File f = new File(containerLogDir, fileType);
+ count = 0;
+ while ((f.exists()) && (count < maxAttempts)) {
+ count++;
+ Thread.sleep(100);
+ }
+ Assert.assertFalse("File [" + f + "] was not deleted", f.exists());
+ }
count = 0;
- while ((f.exists()) && (count < maxAttempts)) {
+ while ((app1LogDir.exists()) && (count < maxAttempts)) {
count++;
Thread.sleep(100);
}
- Assert.assertFalse("File [" + f + "] was not deleted", f.exists());
- }
- count = 0;
- while ((app1LogDir.exists()) && (count < maxAttempts)) {
- count++;
- Thread.sleep(100);
+ Assert.assertFalse("Directory [" + app1LogDir + "] was not deleted",
+ app1LogDir.exists());
+ } else {
+ List<Path> dirList = new ArrayList<>();
+ dirList.add(new Path(app1LogDir.toURI()));
+ verify(delSrvc, never()).delete(argThat(new FileDeletionMatcher(
+ delSrvc, user, null, dirList)));
+
+ String containerIdStr = container11.toString();
+ File containerLogDir = new File(app1LogDir, containerIdStr);
+ int count = 0;
+ int maxAttempts = 50;
+ for (String fileType : new String[]{"stdout", "stderr", "syslog"}) {
+ File f = new File(containerLogDir, fileType);
+ count = 0;
+ while ((f.exists()) && (count < maxAttempts)) {
+ count++;
+ Thread.sleep(100);
+ }
+ Assert.assertTrue("File [" + f + "] was not deleted", f.exists());
+ }
+ count = 0;
+ while ((app1LogDir.exists()) && (count < maxAttempts)) {
+ count++;
+ Thread.sleep(100);
+ }
+ Assert.assertTrue("Directory [" + app1LogDir + "] was not deleted",
+ app1LogDir.exists());
Review Comment:
It takes at least 50 * 100ms * 2 = 10sec to run. Do we really need to wait
for 5 sec? Also, we just need to wait several seconds instead of running while
loop.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml:
##########
@@ -1548,6 +1548,13 @@
<value>600000</value>
</property>
+ <property>
+ <description>Whether to clean up nodemanager logs when log aggregation is
enabled
Review Comment:
We must warn the users. Could you document that setting to false disables
the cleanup nodemanager logging, and it causes disk full in the long run. Users
can set to false for test-only purpose.
##########
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java:
##########
@@ -472,7 +476,9 @@ public void run() {
// do post clean up of log directories on any other exception
LOG.error("Error occurred while aggregating the log for the application "
+ appId, e);
- doAppLogAggregationPostCleanUp();
+ if (enableLocalCleanup) {
+ doAppLogAggregationPostCleanUp();
+ }
Review Comment:
As `enableLocalCleanup` isn't local variable, can we skip cleanup in the
`doAppLogAggregationPostCleanUp()`?
```
private void doAppLogAggregationPostCleanUp() {
if (!enableLocalCleanup) {
return;
}
```
That way we can change only one place.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]