This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new a96a76d  [SYSTEMDS-3232] Fix list writer missing delete of crc files 
on local fs
a96a76d is described below

commit a96a76d1dbe4a86db957f87d8c774ff2c474f131
Author: Matthias Boehm <mboe...@gmail.com>
AuthorDate: Wed Dec 29 19:10:29 2021 +0100

    [SYSTEMDS-3232] Fix list writer missing delete of crc files on local fs
    
    This patch consolidates the code paths for writing scalars and writing
    scalars in lists to consistently remove crc files on local file system.
---
 .../instructions/cp/VariableCPInstruction.java     | 29 +---------------------
 .../org/apache/sysds/runtime/io/ListWriter.java    |  8 ++----
 .../org/apache/sysds/runtime/util/HDFSTool.java    | 24 ++++++++++++++++++
 .../sysds/test/functions/io/ReadWriteListTest.java |  8 ++++--
 4 files changed, 33 insertions(+), 36 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
index 3965656..c579c96 100644
--- 
a/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
+++ 
b/src/main/java/org/apache/sysds/runtime/instructions/cp/VariableCPInstruction.java
@@ -26,9 +26,6 @@ import java.util.List;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.tuple.Pair;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.LocalFileSystem;
-import org.apache.hadoop.fs.Path;
 import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.common.Types.DataType;
 import org.apache.sysds.common.Types.FileFormat;
@@ -52,7 +49,6 @@ import org.apache.sysds.runtime.io.FileFormatProperties;
 import org.apache.sysds.runtime.io.FileFormatPropertiesCSV;
 import org.apache.sysds.runtime.io.FileFormatPropertiesLIBSVM;
 import org.apache.sysds.runtime.io.FileFormatPropertiesHDF5;
-import org.apache.sysds.runtime.io.IOUtilFunctions;
 import org.apache.sysds.runtime.io.ListReader;
 import org.apache.sysds.runtime.io.ListWriter;
 import org.apache.sysds.runtime.io.WriterMatrixMarket;
@@ -990,7 +986,7 @@ public class VariableCPInstruction extends CPInstruction 
implements LineageTrace
                }
 
                if( getInput1().getDataType() == DataType.SCALAR ) {
-                       writeScalarToHDFS(ec, fname);
+                       
HDFSTool.writeScalarToHDFS(ec.getScalarInput(getInput1()), fname);
                }
                else if( getInput1().getDataType() == DataType.MATRIX ) {
                        if( fmt == FileFormat.MM )
@@ -1194,29 +1190,6 @@ public class VariableCPInstruction extends CPInstruction 
implements LineageTrace
                }
        }
 
-       /**
-        * Helper function to write scalars to HDFS based on its value type.
-        *
-        * @param ec execution context
-        * @param fname file name
-        */
-       private void writeScalarToHDFS(ExecutionContext ec, String fname) {
-               try {
-                       ScalarObject scalar = ec.getScalarInput(getInput1());
-                       HDFSTool.writeObjectToHDFS(scalar.getValue(), fname);
-                       HDFSTool.writeScalarMetaDataFile(fname +".mtd", 
getInput1().getValueType(), scalar.getPrivacyConstraint());
-
-                       FileSystem fs = IOUtilFunctions.getFileSystem(fname);
-                       if (fs instanceof LocalFileSystem) {
-                               Path path = new Path(fname);
-                               
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
-                       }
-               }
-               catch ( IOException e ) {
-                       throw new DMLRuntimeException(e);
-               }
-       }
-
        private static void cleanDataOnHDFS(MatrixObject mo) {
                try {
                        String fpath = mo.getFileName();
diff --git a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java 
b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
index 352430a..6ab9363 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ListWriter.java
@@ -69,12 +69,8 @@ public class ListWriter
                                        
((CacheableData<?>)dat).exportData(lfname, fmtStr, props);
                                else if( dat instanceof ListObject )
                                        writeListToHDFS((ListObject)dat, 
lfname, fmtStr, props);
-                               else { //scalar
-                                       ScalarObject so = (ScalarObject) dat;
-                                       
HDFSTool.writeObjectToHDFS(so.getValue(), lfname);
-                                       HDFSTool.writeScalarMetaDataFile(lfname 
+".mtd",
-                                               so.getValueType(), 
so.getPrivacyConstraint());
-                               }
+                               else //scalar
+                                       
HDFSTool.writeScalarToHDFS((ScalarObject)dat, lfname);
                        }
                }
                catch(Exception ex) {
diff --git a/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java 
b/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java
index 9ff4c84..a16f323 100644
--- a/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java
+++ b/src/main/java/org/apache/sysds/runtime/util/HDFSTool.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.io.IOUtils;
@@ -338,6 +339,29 @@ public class HDFSTool
                return br;
        }
        
+       /**
+        * Helper function to write scalars to HDFS,
+        * including writing its meta data and removing CRC files in local file 
system
+        *
+        * @param scalar scalar data object
+        * @param fname file name
+        */
+       public static void writeScalarToHDFS(ScalarObject scalar, String fname) 
{
+               try {
+                       writeObjectToHDFS(scalar.getValue(), fname);
+                       writeScalarMetaDataFile(fname +".mtd", 
scalar.getValueType(), scalar.getPrivacyConstraint());
+
+                       FileSystem fs = IOUtilFunctions.getFileSystem(fname);
+                       if (fs instanceof LocalFileSystem) {
+                               Path path = new Path(fname);
+                               
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
+                       }
+               }
+               catch ( IOException e ) {
+                       throw new DMLRuntimeException(e);
+               }
+       }
+       
        public static void writeDoubleToHDFS ( double d, String filename ) 
throws IOException {
                writeObjectToHDFS(d, filename);
        }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java 
b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
index 68f6031..ca8145b 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/ReadWriteListTest.java
@@ -22,7 +22,9 @@ package org.apache.sysds.test.functions.io;
 import org.junit.Assert;
 import org.junit.Test;
 
+import java.io.File;
 import java.io.IOException;
+import java.util.Arrays;
 
 import org.apache.sysds.common.Types.ExecMode;
 import org.apache.sysds.common.Types.FileFormat;
@@ -104,14 +106,16 @@ public class ReadWriteListTest extends AutomatedTestBase
                        fullDMLScriptName = HOME + TEST_NAME1 + ".dml";
                        programArgs = new String[]{"-args", 
String.valueOf(rows),
                                String.valueOf(cols), output("R1"), 
output("L"), format.toString(), String.valueOf(named)};
-                       
                        runTest(true, false, null, -1);
                        double val1 = 
HDFSTool.readDoubleFromHDFSFile(output("R1"));
                        
+                       //check no crc files
+                       File[] files = new File(output("L")).listFiles();
+                       Assert.assertFalse(Arrays.stream(files).anyMatch(f -> 
f.getName().endsWith(".crc")));
+                       
                        //run read
                        fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
                        programArgs = new String[]{"-args", output("L"), 
output("R2")};
-                       
                        runTest(true, false, null, -1);
                        double val2 = 
HDFSTool.readDoubleFromHDFSFile(output("R2"));
                        

Reply via email to