This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d31086c0a74 HIVE-27317: Temporary (local) session files cleanup
(Sercan Tekin, reviewed by Denys Kuzmenko, Laszlo Vegh)
d31086c0a74 is described below
commit d31086c0a74b8bb48db774379ce6b7ab7d9233ff
Author: Sercan Tekin <[email protected]>
AuthorDate: Thu Jun 22 04:39:17 2023 -0400
HIVE-27317: Temporary (local) session files cleanup (Sercan Tekin, reviewed
by Denys Kuzmenko, Laszlo Vegh)
Closes #4403
---
.../ql/session/TestClearDanglingScratchDir.java | 77 ++++++++++++++++++++++
.../hive/ql/session/ClearDanglingScratchDir.java | 67 ++++++++++++++-----
2 files changed, 128 insertions(+), 16 deletions(-)
diff --git
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java
index 82d3db5910b..8645812ab95 100644
---
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java
+++
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/session/TestClearDanglingScratchDir.java
@@ -18,16 +18,19 @@
package org.apache.hadoop.hive.ql.session;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.PrintStream;
import java.util.UUID;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
+import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.util.Shell;
import org.junit.AfterClass;
import org.junit.Assert;
@@ -43,6 +46,8 @@ public class TestClearDanglingScratchDir {
private ByteArrayOutputStream stderr;
private PrintStream origStdoutPs;
private PrintStream origStderrPs;
+ private static Path customScratchDir;
+ private static Path customLocalTmpDir;
@BeforeClass
static public void oneTimeSetup() throws Exception {
@@ -64,6 +69,11 @@ public class TestClearDanglingScratchDir {
@AfterClass
static public void shutdown() throws Exception {
m_dfs.shutdown();
+
+ // Need to make sure deleting in correct FS
+ FileSystem fs = customScratchDir.getFileSystem(new Configuration());
+ fs.delete(customScratchDir, true);
+ fs.delete(customLocalTmpDir, true);
}
public void redirectStdOutErr() {
@@ -129,4 +139,71 @@ public class TestClearDanglingScratchDir {
Assert.assertEquals(StringUtils.countMatches(stderr.toString(),
"removed"), 1);
ss.close();
}
+
+ /**
+ * Testing behaviour of ClearDanglingScratchDir service over local tmp
files/dirs
+ * @throws Exception
+ */
+ @Test
+ public void testLocalDanglingFilesCleaning() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.set("fs.default.name", "file:///");
+ String tmpDir = System.getProperty("test.tmp.dir");
+ conf.set("hive.exec.scratchdir", tmpDir + "/hive-27317-hdfsscratchdir");
+ conf.set("hive.exec.local.scratchdir", tmpDir +
"/hive-27317-localscratchdir");
+ FileSystem fs = FileSystem.get(conf);
+
+ // Constants
+ String appId = "appId_" + System.currentTimeMillis();
+ String userName = System.getProperty("user.name");
+ String hdfs = "hdfs";
+ String inuse = "inuse.lck";
+ String l = File.separator;
+
+ // Simulating hdfs dangling dir and its inuse.lck file
+ // Note: Give scratch dirs all the write permissions
+ FsPermission allPermissions = new FsPermission((short)00777);
+ customScratchDir = new Path(HiveConf.getVar(conf,
HiveConf.ConfVars.SCRATCHDIR));
+ Utilities.createDirsWithPermission(conf, customScratchDir, allPermissions,
true);
+ Path hdfsRootDir = new Path(customScratchDir + l + userName + l + hdfs);
+ Path hdfsSessionDir = new Path(hdfsRootDir + l + userName + l + appId);
+ Path hdfsSessionLock = new Path(hdfsSessionDir + l + inuse);
+ fs.create(hdfsSessionLock);
+
+ // Simulating local dangling files
+ customLocalTmpDir = new Path (HiveConf.getVar(conf,
HiveConf.ConfVars.LOCALSCRATCHDIR));
+ Path localSessionDir = new Path(customLocalTmpDir + l + appId);
+ Path localPipeOutFileRemove = new Path(customLocalTmpDir + l
+ + appId + "-started-with-session-name.pipeout");
+ Path localPipeOutFileNotRemove = new Path(customLocalTmpDir + l
+ + "not-started-with-session-name-" + appId + ".pipeout");
+ Path localPipeOutFileFailRemove = new Path(customLocalTmpDir + l
+ + appId + "-started-with-session-name-but-fail-delete.pipeout");
+
+ // Create dirs/files
+ Utilities.createDirsWithPermission(conf, localSessionDir, allPermissions,
true);
+ fs.create(localPipeOutFileRemove);
+ fs.create(localPipeOutFileNotRemove);
+ fs.create(localPipeOutFileFailRemove);
+
+ // Set permission for localPipeOutFileFailRemove file as not writable
+ // This will avoid file to be deleted as we check whether it is writable
or not first
+ fs.setPermission(localPipeOutFileFailRemove,
FsPermission.valueOf("-r--r--r--"));
+
+ // The main service will be identifying which session files/dirs are
dangling
+ ClearDanglingScratchDir clearDanglingScratchDirMain = new
ClearDanglingScratchDir(false,
+ false, true, hdfsRootDir.toString(), conf);
+ clearDanglingScratchDirMain.run();
+
+ // localSessionDir and localPipeOutFileRemove should be removed
+ // localPipeOutFileNotRemove and localPipeOutFileFailRemove should not be
removed
+ Assert.assertFalse("Local session dir '" + localSessionDir
+ + "' still exists, should have been removed!",
fs.exists(localSessionDir));
+ Assert.assertFalse("Local .pipeout file '" + localPipeOutFileRemove
+ + "' still exists, should have been removed!",
fs.exists(localPipeOutFileRemove));
+ Assert.assertTrue("Local .pipeout file '" + localPipeOutFileNotRemove
+ + "' does not exist, should have not been removed!",
fs.exists(localPipeOutFileNotRemove));
+ Assert.assertTrue("Local .pipeout file '" + localPipeOutFileFailRemove
+ + "' does not exist, should have not been removed!",
fs.exists(localPipeOutFileFailRemove));
+ }
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java
b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java
index 8d83a00e476..62105dcec5a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/session/ClearDanglingScratchDir.java
@@ -17,10 +17,13 @@
*/
package org.apache.hadoop.hive.ql.session;
+import java.io.File;
import java.io.IOException;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
@@ -53,6 +56,9 @@ import org.slf4j.LoggerFactory;
* lease after 10 min, ie, the HDFS file hold by the dead
HiveCli/HiveServer2 is writable
* again after 10 min. Once it become writable, cleardanglingscratchDir
will be able to
* remove it
+ * 4. Additional functionality; once it is decided which session scratch dirs
are residual,
+ * while removing them from hdfs, we will remove them from local tmp
location as well.
+ * Please see {@link ClearDanglingScratchDir#removeLocalTmpFiles(String,
String)}.
*/
public class ClearDanglingScratchDir implements Runnable {
private static final Logger LOG =
LoggerFactory.getLogger(ClearDanglingScratchDir.class);
@@ -141,25 +147,26 @@ public class ClearDanglingScratchDir implements Runnable {
// if the file is currently held by a writer
if(AlreadyBeingCreatedException.class.getName().equals(eAppend.getClassName())){
inuse = true;
- } else if
(UnsupportedOperationException.class.getName().equals(eAppend.getClassName())) {
- // Append is not supported in the cluster, try to use create
- try {
- IOUtils.closeStream(fs.create(lockFilePath, false));
- } catch (RemoteException eCreate) {
- if
(AlreadyBeingCreatedException.class.getName().equals(eCreate.getClassName())){
- // If the file is held by a writer, will throw
AlreadyBeingCreatedException
- inuse = true;
- } else {
- consoleMessage("Unexpected error:" + eCreate.getMessage());
- }
- } catch (FileAlreadyExistsException eCreateNormal) {
- // Otherwise, throw FileAlreadyExistsException, which means
the file owner is
- // dead
- removable = true;
- }
} else {
consoleMessage("Unexpected error:" + eAppend.getMessage());
}
+ } catch (UnsupportedOperationException eUnsupported) {
+ // In Hadoop-3, append method is not supported.
+ // This is an alternative check to make sure whether a file is in
use or not.
+ // Trying to open the file. If it is in use, it will throw
IOException.
+ try {
+ IOUtils.closeStream(fs.create(lockFilePath, false));
+ } catch (RemoteException eCreate) {
+ if
(AlreadyBeingCreatedException.class.getName().equals(eCreate.getClassName())){
+ // If the file is held by a writer, will throw
AlreadyBeingCreatedException
+ inuse = true;
+ } else {
+ consoleMessage("Unexpected error:" + eCreate.getMessage());
+ }
+ } catch (FileAlreadyExistsException eCreateNormal) {
+ // Otherwise, throw FileAlreadyExistsException, which means the
file owner is dead
+ removable = true;
+ }
}
if (inuse) {
// Cannot open the lock file for writing, must be held by a live
process
@@ -179,6 +186,7 @@ public class ClearDanglingScratchDir implements Runnable {
return;
}
consoleMessage("Removing " + scratchDirToRemove.size() + " scratch
directories");
+ String localTmpDir = HiveConf.getVar(conf,
HiveConf.ConfVars.LOCALSCRATCHDIR);
for (Path scratchDir : scratchDirToRemove) {
if (dryRun) {
System.out.println(scratchDir);
@@ -192,6 +200,8 @@ public class ClearDanglingScratchDir implements Runnable {
consoleMessage(message);
}
}
+ // cleaning up on local file system as well
+ removeLocalTmpFiles(scratchDir.getName(), localTmpDir);
}
}
} catch (IOException e) {
@@ -236,4 +246,29 @@ public class ClearDanglingScratchDir implements Runnable {
return result;
}
+
+ /**
+ * While deleting dangling scratch dirs from hdfs, we can clean
corresponding local files as well
+ * @param sessionName prefix to determine removable tmp files
+ * @param localTmpdir local tmp file location
+ */
+ private void removeLocalTmpFiles(String sessionName, String localTmpdir) {
+ File[] files = new File(localTmpdir).listFiles(fn ->
fn.getName().startsWith(sessionName));
+ boolean success;
+ if (files != null) {
+ for (File file : files) {
+ success = false;
+ if (file.canWrite()) {
+ success = file.delete();
+ }
+ if (success) {
+ consoleMessage("While removing '" + sessionName + "' dangling
scratch dir from HDFS, "
+ + "local tmp session file '" + file.getPath() + "' has been
cleaned as well.");
+ } else if (file.getName().startsWith(sessionName)) {
+ consoleMessage("Even though '" + sessionName + "' is marked as
dangling session dir, "
+ + "local tmp session file '" + file.getPath() + "' could not
be removed.");
+ }
+ }
+ }
+ }
}
\ No newline at end of file