This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 327a91d7d9 [SYSTEMDS-3816] Fix missing removal of list crc files in
local FS
327a91d7d9 is described below
commit 327a91d7d9d38ae99d1461b29c6c03adc46b8e36
Author: Matthias Boehm <[email protected]>
AuthorDate: Thu Jan 16 07:15:01 2025 +0100
[SYSTEMDS-3816] Fix missing removal of list crc files in local FS
The list writer was corrupted a while ago, and did no longer properly
remove the crc files of the list directory and individual files in
local file system.
---
src/main/java/org/apache/sysds/runtime/io/ListWriter.java | 11 ++++++++++-
.../org/apache/sysds/test/functions/io/ReadWriteListTest.java | 8 ++++----
2 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
index b6fabb6d40..15022da0ce 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
@@ -19,7 +19,10 @@
package org.apache.sysds.runtime.io;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
import org.apache.sysds.common.Types.FileFormat;
+import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.conf.DMLConfig;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
@@ -52,6 +55,7 @@ public class ListWriter
try {
//write basic list meta data
+ JobConf job = new
JobConf(ConfigurationManager.getCachedJobConf());
HDFSTool.writeMetaDataFile(fname + ".mtd",
lo.getValueType(), null,
lo.getDataType(), dc,
FileFormat.safeValueOf(fmtStr), props);
@@ -64,13 +68,18 @@ public class ListWriter
for(int i=0; i<lo.getLength(); i++) {
Data dat = lo.getData(i);
String lfname = fname
+"/"+i+"_"+(lo.isNamedList()?lo.getName(i):"null");
- if( dat instanceof CacheableData<?> )
+ if( dat instanceof CacheableData<?> ) {
((CacheableData<?>)dat).exportData(lfname, fmtStr, props);
+
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(job, new Path(lfname));
+ }
else if( dat instanceof ListObject )
writeListToHDFS((ListObject)dat,
lfname, fmtStr, props);
else //scalar
HDFSTool.writeScalarToHDFS((ScalarObject)dat, lfname);
}
+
+ //remove crc file of list directory
+ IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(job,
new Path(fname));
}
catch(Exception ex) {
throw new DMLRuntimeException(
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
index 3bb4ba3050..7894c6da8b 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
@@ -111,10 +111,10 @@ public class ReadWriteListTest extends AutomatedTestBase {
double val1 =
HDFSTool.readDoubleFromHDFSFile(output("R1"));
//check no crc files
- // I have removed this check since i modified the
removal of .crc files to a remove on close
- // File[] files = new File(output("L")).listFiles();
- // LOG.error(Arrays.toString(files));
- // Assert.assertFalse(Arrays.stream(files).anyMatch(f
-> f.getName().endsWith(".crc")));
+ //disabled due to delete on exist, but for temporary
validation via delete
+ //File[] files = new File(output("L")).listFiles();
+ //LOG.error(Arrays.toString(files));
+ //Assert.assertFalse(Arrays.stream(files).anyMatch(f ->
f.getName().endsWith(".crc")));
//run read
fullDMLScriptName = HOME + TEST_NAME2 + ".dml";