Repository: systemml
Updated Branches:
  refs/heads/master df8d4a63d -> 9389a5e1e


[SYSTEMML-1718] Fix matrix export to non-default dfs file system

This patch fixes issues of the buffer pool export (which is also used
for persistent writes of in-memory matrices). In case the write file
scheme is different from the scratch space or input file scheme, an
export of binary matrices via copy fails because we cannot copy across
file systems. 


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/0afaa24e
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/0afaa24e
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/0afaa24e

Branch: refs/heads/master
Commit: 0afaa24e3073a39fc4a38bbeefbad5cff65d0366
Parents: df8d4a6
Author: Matthias Boehm <[email protected]>
Authored: Sun Jun 18 14:02:35 2017 -0700
Committer: Matthias Boehm <[email protected]>
Committed: Mon Jun 19 12:38:20 2017 -0700

----------------------------------------------------------------------
 .../controlprogram/caching/CacheableData.java   | 22 +++++++++++---------
 .../sysml/runtime/io/IOUtilFunctions.java       |  2 +-
 2 files changed, 13 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/0afaa24e/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
index fd6fa16..8532062 100644
--- 
a/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
+++ 
b/src/main/java/org/apache/sysml/runtime/controlprogram/caching/CacheableData.java
@@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicLong;
 import org.apache.commons.lang.mutable.MutableBoolean;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
 import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM;
 import org.apache.sysml.conf.ConfigurationManager;
@@ -42,6 +43,7 @@ import 
org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUObject;
 import org.apache.sysml.runtime.instructions.spark.data.BroadcastObject;
 import org.apache.sysml.runtime.instructions.spark.data.RDDObject;
+import org.apache.sysml.runtime.io.IOUtilFunctions;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.MatrixDimensionsMetaData;
 import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
@@ -761,18 +763,18 @@ public abstract class CacheableData<T extends CacheBlock> 
extends Data
                     getCache();
             }
         }
-                               
-               boolean pWrite = false; // !fName.equals(_hdfsFileName); 
//persistent write flag
-               if ( fName.equals(_hdfsFileName) ) {
+               
+        //check for persistent or transient writes
+               boolean pWrite = !fName.equals(_hdfsFileName);
+               if( !pWrite )
                        setHDFSFileExists(true);
-                       pWrite = false;
-               }
-               else {
-                       pWrite = true;  // i.e., export is called from "write" 
instruction
-               }
-
+               
+               //check for common file scheme (otherwise no copy/rename)
+               boolean eqScheme = IOUtilFunctions.isSameFileScheme(
+                       new Path(_hdfsFileName), new Path(fName));
+               
                //actual export (note: no direct transfer of local copy in 
order to ensure blocking (and hence, parallelism))
-               if(  isDirty()  ||      //use dirty for skipping parallel 
exports
+               if(  isDirty() || !eqScheme ||
                    (pWrite && !isEqualOutputFormat(outputFormat)) ) 
                {                 
                        // CASE 1: dirty in-mem matrix or pWrite w/ different 
format (write matrix to fname; load into memory if evicted)

http://git-wip-us.apache.org/repos/asf/systemml/blob/0afaa24e/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java 
b/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
index 12b7438..5f0e296 100644
--- a/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
+++ b/src/main/java/org/apache/sysml/runtime/io/IOUtilFunctions.java
@@ -77,7 +77,7 @@ public class IOUtilFunctions
                String scheme1 = path1.toUri().getScheme();
                String scheme2 = path2.toUri().getScheme();
                return (scheme1 == null && scheme2 == null)
-                       || scheme1.equals(scheme2);
+                       || (scheme1 != null && scheme1.equals(scheme2));
        }
        
        public static boolean isObjectStoreFileScheme(Path path) {

Reply via email to