This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push: new a96a76d [SYSTEMDS-3232] Fix list writer missing delete of crc files on local fs a96a76d is described below commit a96a76d1dbe4a86db957f87d8c774ff2c474f131 Author: Matthias Boehm <mboe...@gmail.com> AuthorDate: Wed Dec 29 19:10:29 2021 +0100 [SYSTEMDS-3232] Fix list writer missing delete of crc files on local fs This patch consolidates the code paths for writing scalars and writing scalars in lists to consistently remove crc files on local file system. --- .../instructions/cp/VariableCPInstruction.java | 29 +--------------------- .../org/apache/sysds/runtime/io/ListWriter.java | 8 ++---- .../org/apache/sysds/runtime/util/HDFSTool.java | 24 ++++++++++++++++++ .../sysds/test/functions/io/ReadWriteListTest.java | 8 ++++-- 4 files changed, 33 insertions(+), 36 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java index 3965656..c579c96 100644 --- a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java +++ b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java @@ -26,9 +26,6 @@ import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.Path; import org.apache.sysds.api.DMLScript; import org.apache.sysds.common.Types.DataType; import org.apache.sysds.common.Types.FileFormat; @@ -52,7 +49,6 @@ import org.apache.sysds.runtime.io.FileFormatProperties; import org.apache.sysds.runtime.io.FileFormatPropertiesCSV; import org.apache.sysds.runtime.io.FileFormatPropertiesLIBSVM; import org.apache.sysds.runtime.io.FileFormatPropertiesHDF5; -import org.apache.sysds.runtime.io.IOUtilFunctions; import org.apache.sysds.runtime.io.ListReader; import org.apache.sysds.runtime.io.ListWriter; import org.apache.sysds.runtime.io.WriterMatrixMarket; @@ -990,7 +986,7 @@ public class VariableCPInstruction extends CPInstruction implements LineageTrace } if( getInput1().getDataType() == DataType.SCALAR ) { - writeScalarToHDFS(ec, fname); + HDFSTool.writeScalarToHDFS(ec.getScalarInput(getInput1()), fname); } else if( getInput1().getDataType() == DataType.MATRIX ) { if( fmt == FileFormat.MM ) @@ -1194,29 +1190,6 @@ public class VariableCPInstruction extends CPInstruction implements LineageTrace } } - /** - * Helper function to write scalars to HDFS based on its value type. - * - * @param ec execution context - * @param fname file name - */ - private void writeScalarToHDFS(ExecutionContext ec, String fname) { - try { - ScalarObject scalar = ec.getScalarInput(getInput1()); - HDFSTool.writeObjectToHDFS(scalar.getValue(), fname); - HDFSTool.writeScalarMetaDataFile(fname +".mtd", getInput1().getValueType(), scalar.getPrivacyConstraint()); - - FileSystem fs = IOUtilFunctions.getFileSystem(fname); - if (fs instanceof LocalFileSystem) { - Path path = new Path(fname); - IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path); - } - } - catch ( IOException e ) { - throw new DMLRuntimeException(e); - } - } - private static void cleanDataOnHDFS(MatrixObject mo) { try { String fpath = mo.getFileName(); diff --git a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java index 352430a..6ab9363 100644 --- a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java +++ b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java @@ -69,12 +69,8 @@ public class ListWriter ((CacheableData<?>)dat).exportData(lfname, fmtStr, props); else if( dat instanceof ListObject ) writeListToHDFS((ListObject)dat, lfname, fmtStr, props); - else { //scalar - ScalarObject so = (ScalarObject) dat; - HDFSTool.writeObjectToHDFS(so.getValue(), lfname); - HDFSTool.writeScalarMetaDataFile(lfname +".mtd", - so.getValueType(), so.getPrivacyConstraint()); - } + else //scalar + HDFSTool.writeScalarToHDFS((ScalarObject)dat, lfname); } } catch(Exception ex) { diff --git a/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java b/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java index 9ff4c84..a16f323 100644 --- a/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java +++ b/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; +import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; @@ -338,6 +339,29 @@ public class HDFSTool return br; } + /** + * Helper function to write scalars to HDFS, + * including writing its meta data and removing CRC files in local file system + * + * @param scalar scalar data object + * @param fname file name + */ + public static void writeScalarToHDFS(ScalarObject scalar, String fname) { + try { + writeObjectToHDFS(scalar.getValue(), fname); + writeScalarMetaDataFile(fname +".mtd", scalar.getValueType(), scalar.getPrivacyConstraint()); + + FileSystem fs = IOUtilFunctions.getFileSystem(fname); + if (fs instanceof LocalFileSystem) { + Path path = new Path(fname); + IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path); + } + } + catch ( IOException e ) { + throw new DMLRuntimeException(e); + } + } + public static void writeDoubleToHDFS ( double d, String filename ) throws IOException { writeObjectToHDFS(d, filename); } diff --git a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java index 68f6031..ca8145b 100644 --- a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java +++ b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java @@ -22,7 +22,9 @@ package org.apache.sysds.test.functions.io; import org.junit.Assert; import org.junit.Test; +import java.io.File; import java.io.IOException; +import java.util.Arrays; import org.apache.sysds.common.Types.ExecMode; import org.apache.sysds.common.Types.FileFormat; @@ -104,14 +106,16 @@ public class ReadWriteListTest extends AutomatedTestBase fullDMLScriptName = HOME + TEST_NAME1 + ".dml"; programArgs = new String[]{"-args", String.valueOf(rows), String.valueOf(cols), output("R1"), output("L"), format.toString(), String.valueOf(named)}; - runTest(true, false, null, -1); double val1 = HDFSTool.readDoubleFromHDFSFile(output("R1")); + //check no crc files + File[] files = new File(output("L")).listFiles(); + Assert.assertFalse(Arrays.stream(files).anyMatch(f -> f.getName().endsWith(".crc"))); + //run read fullDMLScriptName = HOME + TEST_NAME2 + ".dml"; programArgs = new String[]{"-args", output("L"), output("R2")}; - runTest(true, false, null, -1); double val2 = HDFSTool.readDoubleFromHDFSFile(output("R2"));