This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 0f55165 [SYSTEMDS-2905] Add MetaALL
0f55165 is described below
commit 0f551654dadb354951aea918f46a4394dd409cd7
Author: Olga <[email protected]>
AuthorDate: Fri Mar 12 01:17:56 2021 +0100
[SYSTEMDS-2905] Add MetaALL
This commit unifies the metadata handling accross our reading of files.
Previously the federated part of the system had custom code for this,
that differed from the rest of the system.
- Added MetaAll class
- Draft MetaDataAll
- Fixed init JSONObject
- Fixed mtdExists
- Fixed parsing csv with non-default delim/sep
- Fixed privacy tests
- Fixed tests
Closes #1207
---
.../java/org/apache/sysds/api/jmlc/Connection.java | 53 ++-
.../apache/sysds/hops/recompile/Recompiler.java | 31 +-
.../org/apache/sysds/parser/DataExpression.java | 214 +-----------
.../federated/FederatedWorkerHandler.java | 34 +-
.../org/apache/sysds/runtime/io/ReaderTextCSV.java | 8 +-
.../sysds/runtime/io/ReaderTextCSVParallel.java | 4 +-
.../sysds/runtime/io/ReaderTextCellParallel.java | 2 +-
.../org/apache/sysds/runtime/meta/MetaDataAll.java | 375 +++++++++++++++++++++
.../org/apache/sysds/test/AutomatedTestBase.java | 46 +--
.../MatrixMultiplicationPropagationTest.java | 12 +-
.../privacy/MatrixRuntimePropagationTest.java | 11 +-
.../test/functions/privacy/PrivacyLineageTest.java | 2 +-
.../test/functions/privacy/ReadWriteTest.java | 19 +-
.../functions/privacy/ScalarPropagationTest.java | 9 +-
14 files changed, 504 insertions(+), 316 deletions(-)
diff --git a/src/main/java/org/apache/sysds/api/jmlc/Connection.java
b/src/main/java/org/apache/sysds/api/jmlc/Connection.java
index 44e9114..1c4bada 100644
--- a/src/main/java/org/apache/sysds/api/jmlc/Connection.java
+++ b/src/main/java/org/apache/sysds/api/jmlc/Connection.java
@@ -31,7 +31,7 @@ import java.util.Map;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.wink.json4j.JSONObject;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.api.DMLException;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types.ExecMode;
@@ -362,17 +362,14 @@ public class Connection implements Closeable
try {
//read json meta data
String fnamemtd = DataExpression.getMTDFileName(fname);
- JSONObject jmtd = new
DataExpression().readMetadataFile(fnamemtd, false);
+ MetaDataAll metaObj = new MetaDataAll(fnamemtd, false,
true);
- //parse json meta data
- long rows = jmtd.getLong(DataExpression.READROWPARAM);
- long cols = jmtd.getLong(DataExpression.READCOLPARAM);
- int blen =
jmtd.containsKey(DataExpression.ROWBLOCKCOUNTPARAM)?
- jmtd.getInt(DataExpression.ROWBLOCKCOUNTPARAM)
: -1;
- long nnz =
jmtd.containsKey(DataExpression.READNNZPARAM)?
-
jmtd.getLong(DataExpression.READNNZPARAM) : -1;
- String format =
jmtd.getString(DataExpression.FORMAT_TYPE);
- FileFormat fmt = FileFormat.safeValueOf(format);
+ //parse meta data
+ long rows = metaObj.getDim1();
+ long cols = metaObj.getDim2();
+ int blen = metaObj.getBlocksize();
+ long nnz = metaObj.getNnz();
+ FileFormat fmt = metaObj.getFileFormat();
//read matrix file
return readDoubleMatrix(fname, fmt, rows, cols, blen,
nnz);
@@ -505,11 +502,11 @@ public class Connection implements Closeable
throws IOException
{
try {
- //parse json meta data
- JSONObject jmtd = new JSONObject(meta);
- int rows = jmtd.getInt(DataExpression.READROWPARAM);
- int cols = jmtd.getInt(DataExpression.READCOLPARAM);
- String format =
jmtd.getString(DataExpression.FORMAT_TYPE);
+ //parse meta data
+ MetaDataAll mtd = new MetaDataAll(meta);
+ int rows = (int) mtd.getDim1();
+ int cols = (int) mtd.getDim2();
+ String format = mtd.getFormatTypeString();
//parse the input matrix
return convertToMatrix(input, rows, cols, format);
@@ -607,13 +604,13 @@ public class Connection implements Closeable
try {
//read json meta data
String fnamemtd = DataExpression.getMTDFileName(fname);
- JSONObject jmtd = new
DataExpression().readMetadataFile(fnamemtd, false);
-
- //parse json meta data
- long rows = jmtd.getLong(DataExpression.READROWPARAM);
- long cols = jmtd.getLong(DataExpression.READCOLPARAM);
- String format =
jmtd.getString(DataExpression.FORMAT_TYPE);
- FileFormat fmt = FileFormat.safeValueOf(format);
+
+ MetaDataAll metaObj = new MetaDataAll(fnamemtd, false,
true);
+
+ //parse meta data
+ long rows = metaObj.getDim1();
+ long cols = metaObj.getDim2();
+ FileFormat fmt = metaObj.getFileFormat();
//read frame file
return readStringFrame(fname, fmt, rows, cols);
@@ -744,11 +741,11 @@ public class Connection implements Closeable
throws IOException
{
try {
- //parse json meta data
- JSONObject jmtd = new JSONObject(meta);
- int rows = jmtd.getInt(DataExpression.READROWPARAM);
- int cols = jmtd.getInt(DataExpression.READCOLPARAM);
- String format =
jmtd.getString(DataExpression.FORMAT_TYPE);
+ //parse meta data
+ MetaDataAll mtd = new MetaDataAll(meta);
+ int rows = (int) mtd.getDim1();
+ int cols = (int) mtd.getDim2();
+ String format = mtd.getFormatTypeString();
//parse the input frame
return convertToFrame(input, rows, cols, format);
diff --git a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
index 757c3eb..ab26137 100644
--- a/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
+++ b/src/main/java/org/apache/sysds/hops/recompile/Recompiler.java
@@ -19,9 +19,17 @@
package org.apache.sysds.hops.recompile;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.wink.json4j.JSONObject;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.api.jmlc.JMLCUtils;
import org.apache.sysds.common.Types.DataType;
@@ -85,22 +93,13 @@ import org.apache.sysds.runtime.io.IOUtilFunctions;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.meta.DataCharacteristics;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.runtime.meta.MetaDataFormat;
import org.apache.sysds.runtime.util.HDFSTool;
import org.apache.sysds.runtime.util.ProgramConverter;
import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.sysds.utils.Explain;
import org.apache.sysds.utils.Explain.ExplainType;
-import org.apache.sysds.utils.JSONHelper;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
/**
* Dynamic recompilation of hop dags to runtime instructions, which includes
the
@@ -1600,13 +1599,13 @@ public class Recompiler
FileSystem fs = IOUtilFunctions.getFileSystem(mtdname);
//no auto-close
if( fs.exists(path) ){
try(BufferedReader br = new BufferedReader(new
InputStreamReader(fs.open(path)))) {
- JSONObject mtd = JSONHelper.parse(br);
- DataType dt =
DataType.valueOf(String.valueOf(mtd.get(DataExpression.DATATYPEPARAM)).toUpperCase());
+ MetaDataAll mtd = new MetaDataAll(br);
+ DataType dt = mtd.getDataType();
dop.setDataType(dt);
if(dt != DataType.FRAME)
-
dop.setValueType(ValueType.valueOf(String.valueOf(mtd.get(DataExpression.VALUETYPEPARAM)).toUpperCase()));
-
dop.setDim1((dt==DataType.MATRIX||dt==DataType.FRAME)?Long.parseLong(mtd.get(DataExpression.READROWPARAM).toString()):0);
-
dop.setDim2((dt==DataType.MATRIX||dt==DataType.FRAME)?Long.parseLong(mtd.get(DataExpression.READCOLPARAM).toString()):0);
+
dop.setValueType(mtd.getValueType());
+
dop.setDim1((dt==DataType.MATRIX||dt==DataType.FRAME)? mtd.getDim1():0);
+
dop.setDim2((dt==DataType.MATRIX||dt==DataType.FRAME)? mtd.getDim2():0);
}
}
}
diff --git a/src/main/java/org/apache/sysds/parser/DataExpression.java
b/src/main/java/org/apache/sysds/parser/DataExpression.java
index dcea873..2db8869 100644
--- a/src/main/java/org/apache/sysds/parser/DataExpression.java
+++ b/src/main/java/org/apache/sysds/parser/DataExpression.java
@@ -19,11 +19,6 @@
package org.apache.sysds.parser;
-import static
org.apache.sysds.runtime.instructions.fed.InitFEDInstruction.FED_FRAME_IDENTIFIER;
-import static
org.apache.sysds.runtime.instructions.fed.InitFEDInstruction.FED_MATRIX_IDENTIFIER;
-
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -32,13 +27,12 @@ import java.util.List;
import java.util.Map.Entry;
import java.util.Set;
+import static
org.apache.sysds.runtime.instructions.fed.InitFEDInstruction.FED_FRAME_IDENTIFIER;
+import static
org.apache.sysds.runtime.instructions.fed.InitFEDInstruction.FED_MATRIX_IDENTIFIER;
import org.antlr.v4.runtime.ParserRuleContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
-import org.apache.wink.json4j.JSONArray;
-import org.apache.wink.json4j.JSONObject;
import org.apache.sysds.api.DMLScript;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.FileFormat;
@@ -52,12 +46,12 @@ import org.apache.sysds.runtime.DMLRuntimeException;
import
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
import org.apache.sysds.runtime.io.FileFormatPropertiesMM;
import org.apache.sysds.runtime.io.IOUtilFunctions;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.runtime.privacy.PrivacyConstraint;
import org.apache.sysds.runtime.privacy.PrivacyConstraint.PrivacyLevel;
import org.apache.sysds.runtime.privacy.PrivacyUtils;
import org.apache.sysds.runtime.util.HDFSTool;
import org.apache.sysds.runtime.util.UtilFunctions;
-import org.apache.sysds.utils.JSONHelper;
public class DataExpression extends DataIdentifier
{
@@ -937,8 +931,8 @@ public class DataExpression extends DataIdentifier
toString() + ". Only " +
VALUETYPEPARAM + " is allowed.", conditional,
LanguageErrorCodes.INVALID_PARAMETERS);
}
}
-
- JSONObject configObject = null;
+
+ MetaDataAll configObj = new MetaDataAll();
// Process expressions in input filename
String inputFileName = getInputFileName(currConstVars,
conditional);
@@ -969,9 +963,10 @@ public class DataExpression extends DataIdentifier
// check if file is matrix market format
if (formatTypeString == null && shouldReadMTD){
- if ( checkHasMatrixMarketFormat(inputFileName,
mtdFileName, conditional) ) {
+ if (
MetaDataAll.checkHasMatrixMarketFormat(inputFileName, mtdFileName, conditional)
) {
formatTypeString =
FileFormat.MM.toString();
addVarParam(FORMAT_TYPE, new
StringIdentifier(formatTypeString, this));
+
configObj.setFormatTypeString(formatTypeString);
inferredFormatType = true;
shouldReadMTD = false;
}
@@ -979,9 +974,10 @@ public class DataExpression extends DataIdentifier
// check if file is delimited format
if (formatTypeString == null && shouldReadMTD ) {
- formatTypeString =
checkHasDelimitedFormat(inputFileName, conditional);
+ formatTypeString =
MetaDataAll.checkHasDelimitedFormat(inputFileName, conditional);
if (formatTypeString != null) {
addVarParam(FORMAT_TYPE, new
StringIdentifier(formatTypeString, this));
+
configObj.setFormatTypeString(formatTypeString);
inferredFormatType = true;
}
}
@@ -1034,7 +1030,6 @@ public class DataExpression extends DataIdentifier
raiseValidateError("MM
file: invalid specified number of rows: "+rowsCount2+" vs "+rowsCount);
}
addVarParam(READROWPARAM, new
IntIdentifier(rowsCount, this));
-
long colsCount =
Long.parseLong(sizeInfo[1]);
if (colsCount < 0)
@@ -1045,7 +1040,8 @@ public class DataExpression extends DataIdentifier
raiseValidateError("MM
file: invalid specified number of columns: "+colsCount2+" vs "+colsCount);
}
addVarParam(READCOLPARAM, new
IntIdentifier(colsCount, this));
-
+ configObj.setDimensions(rowsCount,
colsCount);
+
long nnzCount =
Long.parseLong(sizeInfo[2]) * (props.isSymmetric() ? 2 : 1);
if (nnzCount < 0)
raiseValidateError("MM file:
invalid number of non-zeros: "+nnzCount);
@@ -1055,16 +1051,16 @@ public class DataExpression extends DataIdentifier
raiseValidateError("MM
file: invalid specified number of non-zeros: "+nnzCount2+" vs "+nnzCount);
}
addVarParam(READNNZPARAM, new
IntIdentifier(nnzCount, this));
+ configObj.setNnz(nnzCount);
}
}
boolean isCSV = (formatTypeString != null &&
formatTypeString.equalsIgnoreCase(FileFormat.CSV.toString()));
if (shouldReadMTD){
- configObject = readMetadataFile(mtdFileName,
conditional);
- // if the MTD file exists, check the values
specified in read statement match values in metadata MTD file
- if (configObject != null){
-
parseMetaDataFileParameters(mtdFileName, configObject, conditional);
+ configObj = new MetaDataAll(mtdFileName,
conditional, false);
+ if (configObj.mtdExists()){
+ _varParams =
configObj.parseMetaDataFileParameters(mtdFileName, conditional, _varParams);
inferredFormatType = true;
}
else {
@@ -1090,7 +1086,7 @@ public class DataExpression extends DataIdentifier
}
}
}
-
+
// DEFAULT for "sep" : ","
if (getVarParam(DELIM_DELIMITER) == null) {
addVarParam(DELIM_DELIMITER, new
StringIdentifier(DEFAULT_DELIM_DELIMITER, this));
@@ -2300,184 +2296,6 @@ public class DataExpression extends DataIdentifier
return result;
}
- @SuppressWarnings("unchecked")
- private void parseMetaDataFileParameters(String mtdFileName, JSONObject
configObject, boolean conditional)
- {
- for( Object obj : configObject.entrySet() ){
- Entry<Object,Object> e = (Entry<Object, Object>) obj;
- Object key = e.getKey();
- Object val = e.getValue();
-
- boolean isValidName =
READ_VALID_MTD_PARAM_NAMES.contains(key);
-
- if (!isValidName){ //wrong parameters always rejected
- raiseValidateError("MTD file " + mtdFileName +
" contains invalid parameter name: " + key, false);
- }
-
- // if the read method parameter is a constant, then
verify value matches MTD metadata file
- if (getVarParam(key.toString()) != null &&
(getVarParam(key.toString()) instanceof ConstIdentifier)
- &&
!getVarParam(key.toString()).toString().equalsIgnoreCase(val.toString())) {
- raiseValidateError("Parameter '" +
key.toString()
- + "' has conflicting values in metadata
and read statement. MTD file value: '"
- + val.toString() + "'. Read statement
value: '" + getVarParam(key.toString()) + "'.", conditional);
- } else {
- // if the read method does not specify
parameter value, then add MTD metadata file value to parameter list
- if (getVarParam(key.toString()) == null){
- if ((
!key.toString().equalsIgnoreCase(DESCRIPTIONPARAM) ) &&
- (
!key.toString().equalsIgnoreCase(AUTHORPARAM) ) &&
- (
!key.toString().equalsIgnoreCase(CREATEDPARAM) ) )
- {
- StringIdentifier strId = new
StringIdentifier(val.toString(), this);
-
- if (
key.toString().equalsIgnoreCase(DELIM_HAS_HEADER_ROW)
- ||
key.toString().equalsIgnoreCase(DELIM_FILL)
- ||
key.toString().equalsIgnoreCase(DELIM_SPARSE)
- ) {
- // parse these
parameters as boolean values
- BooleanIdentifier
boolId = null;
- if
(strId.toString().equalsIgnoreCase("true")) {
- boolId = new
BooleanIdentifier(true, this);
- } else if
(strId.toString().equalsIgnoreCase("false")) {
- boolId = new
BooleanIdentifier(false, this);
- } else {
-
raiseValidateError("Invalid value provided for '" + DELIM_HAS_HEADER_ROW + "'
in metadata file '" + mtdFileName + "'. "
-
+ "Must be either TRUE or FALSE.", conditional);
- }
-
removeVarParam(key.toString());
-
addVarParam(key.toString(), boolId);
- }
- else if (
key.toString().equalsIgnoreCase(DELIM_FILL_VALUE)) {
- // parse these
parameters as numeric values
- DoubleIdentifier
doubleId = new DoubleIdentifier(Double.parseDouble(strId.toString()),
- this);
-
removeVarParam(key.toString());
-
addVarParam(key.toString(), doubleId);
- }
- else if
(key.toString().equalsIgnoreCase(DELIM_NA_STRINGS)
- ||
key.toString().equalsIgnoreCase(PRIVACY)
- ||
key.toString().equalsIgnoreCase(FINE_GRAINED_PRIVACY)) {
- String naStrings = null;
- if ( val instanceof
String) {
- naStrings =
val.toString();
- }
- else if (val instanceof
JSONArray) {
- StringBuilder
sb = new StringBuilder();
- JSONArray
valarr = (JSONArray)val;
- for(int naid=0;
naid < valarr.size(); naid++ ) {
-
sb.append( (String) valarr.get(naid) );
- if (
naid < valarr.size()-1)
-
sb.append( DELIM_NA_STRING_SEP );
- }
- naStrings =
sb.toString();
- }
- else if ( val
instanceof JSONObject ){
- JSONObject
valJsonObject = (JSONObject)val;
- naStrings =
valJsonObject.toString();
- }
- else {
- throw new
ParseException("Type of value " + val
- + "
from metadata not recognized by parser.");
- }
- StringIdentifier sid =
new StringIdentifier(naStrings, this);
-
removeVarParam(key.toString());
-
addVarParam(key.toString(), sid);
- }
- else {
- // by default, treat a
parameter as a string
-
addVarParam(key.toString(), strId);
- }
- }
- }
- }
- }
- }
-
- public JSONObject readMetadataFile(String filename, boolean
conditional)
- {
- JSONObject retVal = null;
- boolean exists = HDFSTool.existsFileOnHDFS(filename);
- boolean isDir = HDFSTool.isDirectory(filename);
-
- // CASE: filename is a directory -- process as a directory
- if( exists && isDir )
- {
- retVal = new JSONObject();
- for(FileStatus stat :
HDFSTool.getDirectoryListing(filename)) {
- Path childPath = stat.getPath(); // gives
directory name
- if( !childPath.getName().startsWith("part") )
- continue;
- try (BufferedReader br = new BufferedReader(new
InputStreamReader(
-
IOUtilFunctions.getFileSystem(childPath).open(childPath))))
- {
- JSONObject childObj =
JSONHelper.parse(br);
- for( Object obj : childObj.entrySet() ){
- @SuppressWarnings("unchecked")
- Entry<Object,Object> e =
(Entry<Object, Object>) obj;
- Object key = e.getKey();
- Object val = e.getValue();
- retVal.put(key, val);
- }
- }
- catch(Exception e){
- raiseValidateError("for MTD file in
directory, error parting part of MTD file with path " + childPath.toString() +
": " + e.getMessage(), conditional);
- }
- }
- }
- // CASE: filename points to a file
- else if (exists) {
- Path path = new Path(filename);
- try (BufferedReader br = new BufferedReader(new
InputStreamReader(
-
IOUtilFunctions.getFileSystem(path).open(path))))
- {
- retVal = new JSONObject(br);
- }
- catch (Exception e){
- raiseValidateError("error parsing MTD file with
path " + filename + ": " + e.getMessage(), conditional);
- }
- }
-
- return retVal;
- }
-
- public boolean checkHasMatrixMarketFormat(String inputFileName, String
mtdFileName, boolean conditional)
- {
- // Check the MTD file exists. if there is an MTD file, return
false.
- JSONObject mtdObject = readMetadataFile(mtdFileName,
conditional);
- if (mtdObject != null)
- return false;
-
- if( HDFSTool.existsFileOnHDFS(inputFileName)
- && !HDFSTool.isDirectory(inputFileName) )
- {
- Path path = new Path(inputFileName);
- try( BufferedReader in = new BufferedReader(new
InputStreamReader(
-
IOUtilFunctions.getFileSystem(path).open(path))))
- {
- String headerLine = new String("");
- if (in.ready())
- headerLine = in.readLine();
- return (headerLine !=null &&
headerLine.startsWith("%%"));
- }
- catch(Exception ex) {
- throw new LanguageException("Failed to read
matrix market header.", ex);
- }
- }
- return false;
- }
-
- public String checkHasDelimitedFormat(String filename, boolean
conditional) {
- // if the MTD file exists, check the format is not binary
- JSONObject mtdObject = readMetadataFile(filename + ".mtd",
conditional);
- if (mtdObject != null) {
- String formatTypeString =
(String)JSONHelper.get(mtdObject,FORMAT_TYPE);
- if( FileFormat.isDelimitedFormat(formatTypeString) )
- return formatTypeString;
- }
- return null;
- // The file format must be specified either in .mtd file or in
read() statement
- // Therefore, one need not actually read the data to infer the
format.
- }
-
public boolean isCSVReadWithUnknownSize() {
Expression format = getVarParam(FORMAT_TYPE);
if( _opcode == DataOp.READ && format!=null &&
format.toString().equalsIgnoreCase(FileFormat.CSV.toString()) ) {
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/federated/FederatedWorkerHandler.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/federated/FederatedWorkerHandler.java
index 57d5ba3..b0cc075 100644
---
a/src/main/java/org/apache/sysds/runtime/controlprogram/federated/FederatedWorkerHandler.java
+++
b/src/main/java/org/apache/sysds/runtime/controlprogram/federated/FederatedWorkerHandler.java
@@ -23,6 +23,10 @@ import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Arrays;
+import io.netty.channel.ChannelFuture;
+import io.netty.channel.ChannelFutureListener;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.ChannelInboundHandlerAdapter;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;
@@ -54,18 +58,11 @@ import
org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
import org.apache.sysds.runtime.lineage.LineageItem;
import org.apache.sysds.runtime.lineage.LineageItemUtils;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.runtime.meta.MetaDataFormat;
import org.apache.sysds.runtime.privacy.DMLPrivacyException;
import org.apache.sysds.runtime.privacy.PrivacyMonitor;
-import org.apache.sysds.runtime.privacy.propagation.PrivacyPropagator;
-import org.apache.sysds.utils.JSONHelper;
import org.apache.sysds.utils.Statistics;
-import org.apache.wink.json4j.JSONObject;
-
-import io.netty.channel.ChannelFuture;
-import io.netty.channel.ChannelFutureListener;
-import io.netty.channel.ChannelHandlerContext;
-import io.netty.channel.ChannelInboundHandlerAdapter;
public class FederatedWorkerHandler extends ChannelInboundHandlerAdapter {
protected static Logger log =
Logger.getLogger(FederatedWorkerHandler.class);
@@ -195,24 +192,25 @@ public class FederatedWorkerHandler extends
ChannelInboundHandlerAdapter {
FileFormat fmt = null;
boolean header = false;
+ String delim = null;
FileSystem fs = null;
+ MetaDataAll mtd;
try {
String mtdname =
DataExpression.getMTDFileName(filename);
Path path = new Path(mtdname);
fs = IOUtilFunctions.getFileSystem(mtdname);
try(BufferedReader br = new BufferedReader(new
InputStreamReader(fs.open(path)))) {
- JSONObject mtd = JSONHelper.parse(br);
- if(mtd == null)
+ mtd = new MetaDataAll(br);
+ if(!mtd.mtdExists())
return new
FederatedResponse(ResponseType.ERROR,
new
FederatedWorkerHandlerException("Could not parse metadata file"));
-
mc.setRows(mtd.getLong(DataExpression.READROWPARAM));
-
mc.setCols(mtd.getLong(DataExpression.READCOLPARAM));
- if(mtd.containsKey(DataExpression.READNNZPARAM))
-
mc.setNonZeros(mtd.getLong(DataExpression.READNNZPARAM));
- if(mtd.has(DataExpression.DELIM_HAS_HEADER_ROW))
- header =
mtd.getBoolean(DataExpression.DELIM_HAS_HEADER_ROW);
- cd = (CacheableData<?>)
PrivacyPropagator.parseAndSetPrivacyConstraint(cd, mtd);
- fmt =
FileFormat.safeValueOf(mtd.getString(DataExpression.FORMAT_TYPE));
+ mc.setRows(mtd.getDim1());
+ mc.setCols(mtd.getDim2());
+ mc.setNonZeros(mtd.getNnz());
+ header = mtd.getHasHeader();
+ cd = mtd.parseAndSetPrivacyConstraint(cd);
+ fmt = mtd.getFileFormat();
+ delim = mtd.getDelim();
}
}
catch (DMLPrivacyException | FederatedWorkerHandlerException
ex){
diff --git a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSV.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSV.java
index 63d0b04..53f6037 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSV.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSV.java
@@ -58,13 +58,13 @@ public class ReaderTextCSV extends MatrixReader
ret = createOutputMatrixBlock(rlen, clen, (int)rlen,
estnnz, true, false);
//prepare file access
- JobConf job = new
JobConf(ConfigurationManager.getCachedJobConf());
+ JobConf job = new
JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path( fname );
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
//check existence and non-empty file
- checkValidInputFile(fs, path);
-
+ checkValidInputFile(fs, path);
+
//core read
ret = readCSVMatrixFromHDFS(path, job, fs, ret, rlen, clen,
blen,
_props.hasHeader(), _props.getDelim(), _props.isFill(),
_props.getFillValue(), _props.getNAStrings() );
@@ -108,7 +108,7 @@ public class ReaderTextCSV extends MatrixReader
}
else
files.add(path);
-
+
//determine matrix size via additional pass if required
if ( dest == null ) {
dest = computeCSVSize(files, job, fs, hasHeader, delim,
fill, fillValue);
diff --git
a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSVParallel.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSVParallel.java
index da1875b..6d879d1 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSVParallel.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCSVParallel.java
@@ -77,6 +77,7 @@ public class ReaderTextCSVParallel extends MatrixReader {
// prepare file access
_job = new JobConf(ConfigurationManager.getCachedJobConf());
+
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, _job);
@@ -495,7 +496,8 @@ public class ReaderTextCSVParallel extends MatrixReader {
}
private class CSVReadSparseNoNanTaskAndFill extends CSVReadTask {
- public CSVReadSparseNoNanTaskAndFill(InputSplit split,
TextInputFormat informat, MatrixBlock dest, int splitCount) {
+ public CSVReadSparseNoNanTaskAndFill(InputSplit split,
TextInputFormat informat, MatrixBlock dest,
+ int splitCount) {
super(split, informat, dest, splitCount);
}
diff --git
a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
index f311d03..9c1bc5c 100644
--- a/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
+++ b/src/main/java/org/apache/sysds/runtime/io/ReaderTextCellParallel.java
@@ -256,7 +256,7 @@ public class ReaderTextCellParallel extends ReaderTextCell
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
-
+
RecordReader<LongWritable,Text> reader =
_informat.getRecordReader(_split, _job, Reporter.NULL);
try {
//counting without locking as conflicts unlikely
diff --git a/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
b/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
new file mode 100644
index 0000000..df25887
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/meta/MetaDataAll.java
@@ -0,0 +1,375 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.meta;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.EnumUtils;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.sysds.api.DMLScript;
+import org.apache.sysds.common.Types;
+import org.apache.sysds.parser.BooleanIdentifier;
+import org.apache.sysds.parser.ConstIdentifier;
+import org.apache.sysds.parser.DataExpression;
+import org.apache.sysds.parser.DataIdentifier;
+import org.apache.sysds.parser.DoubleIdentifier;
+import org.apache.sysds.parser.Expression;
+import org.apache.sysds.parser.LanguageException;
+import org.apache.sysds.parser.ParseException;
+import org.apache.sysds.parser.StringIdentifier;
+import org.apache.sysds.runtime.controlprogram.caching.CacheableData;
+import org.apache.sysds.runtime.io.IOUtilFunctions;
+import org.apache.sysds.runtime.privacy.PrivacyConstraint;
+import org.apache.sysds.runtime.privacy.propagation.PrivacyPropagator;
+import org.apache.sysds.runtime.util.HDFSTool;
+import org.apache.sysds.utils.JSONHelper;
+import org.apache.wink.json4j.JSONArray;
+import org.apache.wink.json4j.JSONException;
+import org.apache.wink.json4j.JSONObject;
+
+public class MetaDataAll extends DataIdentifier {
+
+ private JSONObject _metaObj;
+
+ protected String _formatTypeString;
+ protected String _fineGrainedPrivacy;
+ protected String _schema;
+ protected String _delim = DataExpression.DEFAULT_DELIM_DELIMITER;
+ protected boolean _hasHeader = false;
+ protected boolean _sparseDelim = DataExpression.DEFAULT_DELIM_SPARSE;
+
+ public MetaDataAll() {
+ // do nothing
+ }
+
+ public MetaDataAll(String meta) {
+ try {
+ _metaObj = new JSONObject(meta);
+ }
+ catch(JSONException e) {
+ e.printStackTrace();
+ }
+ setPrivacy(PrivacyConstraint.PrivacyLevel.None);
+ parseMetaDataParams();
+ }
+
+ public MetaDataAll(BufferedReader br) {
+ try {
+ _metaObj = JSONHelper.parse(br);
+ }
+ catch(IOException e) {
+ e.printStackTrace();
+ }
+ setPrivacy(PrivacyConstraint.PrivacyLevel.None);
+ parseMetaDataParams();
+ }
+
+ public MetaDataAll(String mtdFileName, boolean conditional, boolean
parseMeta) {
+ _metaObj = readMetadataFile(mtdFileName, conditional);
+ setPrivacy(PrivacyConstraint.PrivacyLevel.None);
+ if(parseMeta)
+ parseMetaDataParams();
+ }
+
+ public JSONObject readMetadataFile(String filename, boolean conditional)
+ {
+ JSONObject retVal = new JSONObject();
+ boolean exists = HDFSTool.existsFileOnHDFS(filename);
+ boolean isDir = HDFSTool.isDirectory(filename);
+
+ // CASE: filename is a directory -- process as a directory
+ if( exists && isDir )
+ {
+ for(FileStatus stat :
HDFSTool.getDirectoryListing(filename)) {
+ Path childPath = stat.getPath(); // gives
directory name
+ if( !childPath.getName().startsWith("part") )
+ continue;
+ try (BufferedReader br = new BufferedReader(new
InputStreamReader(
+
IOUtilFunctions.getFileSystem(childPath).open(childPath))))
+ {
+ JSONObject childObj =
JSONHelper.parse(br);
+ for( Object obj : childObj.entrySet() ){
+ @SuppressWarnings("unchecked")
Map.Entry<Object,Object> e = (Map.Entry<Object, Object>) obj;
+ Object key = e.getKey();
+ Object val = e.getValue();
+ retVal.put(key, val);
+ }
+ }
+ catch( IOException e){
+ raiseValidateError("for MTD file in
directory, error parting part of MTD file with path " + childPath.toString() +
": " + e.getMessage(), conditional);
+ }
+ }
+ }
+
+ // CASE: filename points to a file
+ else if (exists) {
+ Path path = new Path(filename);
+ try (BufferedReader br = new BufferedReader(new
InputStreamReader(
+
IOUtilFunctions.getFileSystem(path).open(path))))
+ {
+ retVal = new JSONObject(br);
+ }
+ catch (Exception e){
+ raiseValidateError("error parsing MTD file with
path " + filename + ": " + e.getMessage(), conditional);
+ }
+ }
+
+ return retVal;
+ }
+
+ private void parseMetaDataParams()
+ {
+ for( Object obj : _metaObj.entrySet() ){
+ Map.Entry<Object,Object> e = (Map.Entry<Object,
Object>) obj;
+ Object key = e.getKey();
+ Object val = e.getValue();
+
+ boolean isValidName =
DataExpression.READ_VALID_MTD_PARAM_NAMES.contains(key);
+
+ if (!isValidName){ //wrong parameters always rejected
+ raiseValidateError("MTD file " + " contains
invalid parameter name: " + key, false);
+ }
+
+ parseMetaDataParam(key, val);
+ }
+ if(_format == null)
+ setFormatTypeString(null);
+ }
+
+ private void parseMetaDataParam(Object key, Object val)
+ {
+ switch(key.toString()) {
+ case DataExpression.READROWPARAM: _dim1 = (Integer)
val; break;
+ case DataExpression.READCOLPARAM: _dim2 = (Integer)
val; break;
+ case DataExpression.ROWBLOCKCOUNTPARAM:
setBlocksize((Integer) val); break;
+ case DataExpression.READNNZPARAM: setNnz((Integer)
val); break;
+ case DataExpression.FORMAT_TYPE:
setFormatTypeString((String) val); break;
+ case DataExpression.DATATYPEPARAM:
setDataType(Types.DataType.valueOf(((String) val).toUpperCase())); break;
+ case DataExpression.VALUETYPEPARAM:
setValueType(Types.ValueType.fromExternalString((String) val)); break;
+ case DataExpression.PRIVACY:
setPrivacy(PrivacyConstraint.PrivacyLevel.valueOf((String) val)); break;
+ case DataExpression.FINE_GRAINED_PRIVACY:
setFineGrainedPrivacy(val.toString()); break;
+ case DataExpression.DELIM_DELIMITER:
setDelim(val.toString()); break;
+ case DataExpression.SCHEMAPARAM:
setSchema(val.toString()); break;
+ case DataExpression.DELIM_HAS_HEADER_ROW:
setHasHeader(true);
+ case DataExpression.DELIM_SPARSE:
setSparseDelim((boolean) val);
+ }
+ }
+
+ public boolean mtdExists() { return _metaObj != null &&
!_metaObj.isEmpty(); }
+
+ public CacheableData<?> parseAndSetPrivacyConstraint(CacheableData<?>
cd) throws JSONException {
+ return (CacheableData<?>)
PrivacyPropagator.parseAndSetPrivacyConstraint(cd, _metaObj);
+ }
+
+ public String getFormatTypeString() { return _formatTypeString; }
+
+ public String getFineGrainedPrivacy() { return _fineGrainedPrivacy; }
+
+ public String getDelim() { return _delim; }
+
+ public String getSchema() { return _schema; }
+
+ public boolean getHasHeader() { return _hasHeader; }
+
+ public boolean getSparseDelim() { return _sparseDelim; }
+
+ public void setSparseDelim(boolean sparseDelim) { _sparseDelim =
sparseDelim; }
+
+ public void setHasHeader(boolean hasHeader) { _hasHeader = hasHeader; }
+
+ public void setFineGrainedPrivacy(String fineGrainedPrivacy) {
_fineGrainedPrivacy = fineGrainedPrivacy; }
+
+ public void setSchema(String schema) { _schema = schema; }
+
+ public void setDelim(String delim) { _delim = delim; }
+
+ public void setFormatTypeString(String format) {
+ _formatTypeString = _formatTypeString != null && format == null
&& _metaObj != null ? (String)JSONHelper.get(_metaObj,
DataExpression.FORMAT_TYPE) : format ;
+ if(_formatTypeString != null &&
EnumUtils.isValidEnum(Types.FileFormat.class, _formatTypeString.toUpperCase()))
+
setFileFormat(Types.FileFormat.safeValueOf(_formatTypeString));
+ }
+
+ @SuppressWarnings("unchecked")
+ public HashMap<String, Expression> parseMetaDataFileParameters(String
mtdFileName, boolean conditional, HashMap<String, Expression> varParams)
+ {
+ for( Object obj : _metaObj.entrySet() ){
+ Map.Entry<Object,Object> e = (Map.Entry<Object,
Object>) obj;
+ Object key = e.getKey();
+ Object val = e.getValue();
+
+ boolean isValidName =
DataExpression.READ_VALID_MTD_PARAM_NAMES.contains(key);
+
+ if (!isValidName){ //wrong parameters always rejected
+ raiseValidateError("MTD file " + mtdFileName +
" contains invalid parameter name: " + key, false);
+ }
+
+ parseMetaDataParam(key, val);
+
+ // if the read method parameter is a constant, then
verify value matches MTD metadata file
+ if (varParams.get(key.toString()) != null &&
(varParams.get(key.toString()) instanceof ConstIdentifier)
+ &&
!varParams.get(key.toString()).toString().equalsIgnoreCase(val.toString())) {
+ raiseValidateError("Parameter '" +
key.toString()
+ + "' has conflicting values in metadata
and read statement. MTD file value: '"
+ + val.toString() + "'. Read statement
value: '" + varParams.get(key.toString()) + "'.", conditional);
+ } else {
+ // if the read method does not specify
parameter value, then add MTD metadata file value to parameter list
+ if (varParams.get(key.toString()) == null){
+ if ((
!key.toString().equalsIgnoreCase(DataExpression.DESCRIPTIONPARAM) ) &&
+ (
!key.toString().equalsIgnoreCase(DataExpression.AUTHORPARAM) ) &&
+ (
!key.toString().equalsIgnoreCase(DataExpression.CREATEDPARAM) ) )
+ {
+ StringIdentifier strId = new
StringIdentifier(val.toString(), this);
+
+ if (
key.toString().equalsIgnoreCase(DataExpression.DELIM_HAS_HEADER_ROW)
+ ||
key.toString().equalsIgnoreCase(DataExpression.DELIM_FILL)
+ ||
key.toString().equalsIgnoreCase(DataExpression.DELIM_SPARSE)
+ ) {
+ // parse these
parameters as boolean values
+ BooleanIdentifier
boolId = null;
+ if
(strId.toString().equalsIgnoreCase("true")) {
+ boolId = new
BooleanIdentifier(true, this);
+ } else if
(strId.toString().equalsIgnoreCase("false")) {
+ boolId = new
BooleanIdentifier(false, this);
+ } else {
+
raiseValidateError("Invalid value provided for '" +
DataExpression.DELIM_HAS_HEADER_ROW + "' in metadata file '" + mtdFileName +
"'. "
+ + "Must
be either TRUE or FALSE.", conditional);
+ }
+
varParams.remove(key.toString());
+
addVarParam(key.toString(), boolId, varParams);
+
+
switch(key.toString().toUpperCase()) {
+ case
DataExpression.DELIM_HAS_HEADER_ROW: ;
+ case
DataExpression.DELIM_FILL: ;
+ case
DataExpression.DELIM_SPARSE: ;
+ }
+
+ }
+ else if (
key.toString().equalsIgnoreCase(DataExpression.DELIM_FILL_VALUE)) {
+ // parse these
parameters as numeric values
+ DoubleIdentifier
doubleId = new DoubleIdentifier(Double.parseDouble(strId.toString()),
+ this);
+
varParams.remove(key.toString());
+
addVarParam(key.toString(), doubleId, varParams);
+ }
+ else if
(key.toString().equalsIgnoreCase(DataExpression.DELIM_NA_STRINGS)
+ ||
key.toString().equalsIgnoreCase(DataExpression.PRIVACY)
+ ||
key.toString().equalsIgnoreCase(DataExpression.FINE_GRAINED_PRIVACY)) {
+ String naStrings = null;
+ if ( val instanceof
String) {
+ naStrings =
val.toString();
+ }
+ else if (val instanceof
JSONArray) {
+ StringBuilder
sb = new StringBuilder();
+ JSONArray
valarr = (JSONArray)val;
+ for(int naid=0;
naid < valarr.size(); naid++ ) {
+
sb.append( (String) valarr.get(naid) );
+ if (
naid < valarr.size()-1)
+
sb.append( DataExpression.DELIM_NA_STRING_SEP );
+ }
+ naStrings =
sb.toString();
+ }
+ else if ( val
instanceof JSONObject ){
+ JSONObject
valJsonObject = (JSONObject)val;
+ naStrings =
valJsonObject.toString();
+ }
+ else {
+ throw new
ParseException("Type of value " + val
+ + "
from metadata not recognized by parser.");
+ }
+ StringIdentifier sid =
new StringIdentifier(naStrings, this);
+
varParams.remove(key.toString());
+
addVarParam(key.toString(), sid, varParams);
+ }
+ else {
+ // by default, treat a
parameter as a string
+
addVarParam(key.toString(), strId, varParams);
+ }
+ }
+ }
+ }
+
+ if(_format == null)
+ setFormatTypeString(null);
+ }
+ return varParams;
+ }
+
+ public void addVarParam(String name, Expression value, HashMap<String,
Expression> varParams) {
+ if (DMLScript.VALIDATOR_IGNORE_ISSUES && (value == null)) {
+ return;
+ }
+ varParams.put(name, value);
+
+ // if required, initialize values
+ setFilename(value.getFilename());
+ if (getBeginLine() == 0) setBeginLine(value.getBeginLine());
+ if (getBeginColumn() == 0)
setBeginColumn(value.getBeginColumn());
+ if (getEndLine() == 0) setEndLine(value.getEndLine());
+ if (getEndColumn() == 0) setEndColumn(value.getEndColumn());
+ if (getText() == null) setText(value.getText());
+ }
+
+ public static String checkHasDelimitedFormat(String filename, boolean
conditional) {
+ // if the MTD file exists, check the format is not binary
+ MetaDataAll mtdObject = new MetaDataAll(filename + ".mtd",
conditional, false);
+ if (mtdObject.mtdExists()) {
+ try {
+ mtdObject.setFormatTypeString((String)
mtdObject._metaObj.get(DataExpression.FORMAT_TYPE));
+
if(Types.FileFormat.isDelimitedFormat(mtdObject.getFormatTypeString()))
+ return mtdObject.getFormatTypeString();
+ }
+ catch(JSONException e) {
+ e.printStackTrace();
+ }
+ }
+ return null;
+ }
+
+ public static boolean checkHasMatrixMarketFormat(String inputFileName,
String mtdFileName, boolean conditional)
+ {
+ // Check the MTD file exists. if there is an MTD file, return
false.
+ MetaDataAll mtdObject = new MetaDataAll(mtdFileName,
conditional, false);
+ if (mtdObject.mtdExists())
+ return false;
+
+ if( HDFSTool.existsFileOnHDFS(inputFileName)
+ && !HDFSTool.isDirectory(inputFileName) )
+ {
+ Path path = new Path(inputFileName);
+ try( BufferedReader in = new BufferedReader(new
InputStreamReader(
+
IOUtilFunctions.getFileSystem(path).open(path))))
+ {
+ String headerLine = new String("");
+ if (in.ready())
+ headerLine = in.readLine();
+ return (headerLine !=null &&
headerLine.startsWith("%%"));
+ }
+ catch(Exception ex) {
+ throw new LanguageException("Failed to read
matrix market header.", ex);
+ }
+ }
+ return false;
+ }
+}
diff --git a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
index d90d450..5929fc6 100644
--- a/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
+++ b/src/test/java/org/apache/sysds/test/AutomatedTestBase.java
@@ -76,6 +76,7 @@ import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.runtime.meta.MetaDataFormat;
import org.apache.sysds.runtime.privacy.CheckedConstraintsLog;
import org.apache.sysds.runtime.privacy.PrivacyConstraint;
@@ -906,9 +907,9 @@ public abstract class AutomatedTestBase {
public static MatrixCharacteristics readDMLMetaDataFile(String
fileName) {
try {
- JSONObject meta = getMetaDataJSON(fileName);
- long rlen =
Long.parseLong(meta.get(DataExpression.READROWPARAM).toString());
- long clen =
Long.parseLong(meta.get(DataExpression.READCOLPARAM).toString());
+ MetaDataAll metaDataAll = getMetaData(fileName);
+ long rlen = metaDataAll.getDim1();
+ long clen = metaDataAll.getDim2();
return new MatrixCharacteristics(rlen, clen, -1, -1);
}
catch(Exception ex) {
@@ -916,13 +917,13 @@ public abstract class AutomatedTestBase {
}
}
- public static JSONObject getMetaDataJSON(String fileName) {
- return getMetaDataJSON(fileName, OUTPUT_DIR);
+ public static MetaDataAll getMetaData(String fileName) {
+ return getMetaData(fileName, OUTPUT_DIR);
}
- public static JSONObject getMetaDataJSON(String fileName, String
outputDir) {
+ public static MetaDataAll getMetaData(String fileName, String
outputDir) {
String fname = baseDirectory + outputDir + fileName + ".mtd";
- return new DataExpression().readMetadataFile(fname, false);
+ return new MetaDataAll(fname, false, true);
}
/**
@@ -934,17 +935,16 @@ public abstract class AutomatedTestBase {
public static PrivacyConstraint getPrivacyConstraintFromMetaData(String
fileName, String dir) throws DMLRuntimeException {
PrivacyConstraint outputPrivacyConstraint = new
PrivacyConstraint();
try {
- JSONObject metadata = getMetaDataJSON(fileName, dir);
- if ( metadata != null ){
- if (
metadata.containsKey(DataExpression.PRIVACY) ){
- PrivacyLevel readPrivacyLevel =
PrivacyLevel.valueOf(metadata.get(DataExpression.PRIVACY).toString());
-
outputPrivacyConstraint.setPrivacyLevel(readPrivacyLevel);
+ MetaDataAll metadata = getMetaData(fileName, dir);
+ Object metaValue;
+ if ( metadata.mtdExists() ){
+ if ( (metaValue = metadata.getPrivacy()) !=
null){
+
outputPrivacyConstraint.setPrivacyLevel(((PrivacyConstraint)
metaValue).getPrivacyLevel());
} else {
outputPrivacyConstraint.setPrivacyLevel(PrivacyLevel.None);
}
- if (
metadata.containsKey(DataExpression.FINE_GRAINED_PRIVACY)){
- JSONObject fineGrainedJSON =
(JSONObject) metadata.get(DataExpression.FINE_GRAINED_PRIVACY);
-
PrivacyUtils.putFineGrainedConstraintsFromString(outputPrivacyConstraint.getFineGrainedPrivacy(),
fineGrainedJSON.toString());
+ if ((metaValue =
metadata.getFineGrainedPrivacy()) != null ){
+
PrivacyUtils.putFineGrainedConstraintsFromString(outputPrivacyConstraint.getFineGrainedPrivacy(),
(String) metaValue);
}
}
} catch (JSONException e){
@@ -957,9 +957,9 @@ public abstract class AutomatedTestBase {
return getPrivacyConstraintFromMetaData(fileName, OUTPUT_DIR);
}
- public static String readDMLMetaDataValue(String fileName, String
outputDir, String key) throws JSONException {
- JSONObject meta = getMetaDataJSON(fileName, outputDir);
- return meta.get(key).toString();
+ public static String readDMLMetaDataPrivacyValue(String fileName,
String outputDir, String key) {
+ MetaDataAll meta = getMetaData(fileName, outputDir);
+ return key.equals(DataExpression.FINE_GRAINED_PRIVACY) ?
meta.getFineGrainedPrivacy() : meta.getPrivacy().getPrivacyLevel().name();
}
/**
@@ -970,11 +970,11 @@ public abstract class AutomatedTestBase {
* @param key key to find in metadata
* @return value retrieved from metadata for the given key
*/
- public static String readDMLMetaDataValueCatchException(String
fileName, String outputDir, String key) {
+ public static String readDMLMetaDataPrivacyValueCatchException(String
fileName, String outputDir, String key) {
try {
- return readDMLMetaDataValue(fileName, outputDir, key);
+ return readDMLMetaDataPrivacyValue(fileName, outputDir,
key);
}
- catch(JSONException | NullPointerException e) {
+ catch(NullPointerException e) {
fail("Privacy constraint not written to output metadata
file:\n" + e);
return null;
}
@@ -982,8 +982,8 @@ public abstract class AutomatedTestBase {
public static ValueType readDMLMetaDataValueType(String fileName) {
try {
- JSONObject meta = getMetaDataJSON(fileName);
- return
ValueType.fromExternalString(meta.get(DataExpression.VALUETYPEPARAM).toString());
+ MetaDataAll meta = getMetaData(fileName);
+ return meta.getValueType();
}
catch(Exception ex) {
throw new RuntimeException(ex);
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/MatrixMultiplicationPropagationTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/MatrixMultiplicationPropagationTest.java
index 9e5110d..3686a22 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/MatrixMultiplicationPropagationTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/MatrixMultiplicationPropagationTest.java
@@ -110,7 +110,7 @@ public class MatrixMultiplicationPropagationTest extends
AutomatedTestBase {
// Check that the output metadata is correct
if ( privacyLevel != PrivacyLevel.None ){
- String actualPrivacyValue = readDMLMetaDataValue("c",
OUTPUT_DIR, DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValue("c", OUTPUT_DIR, DataExpression.PRIVACY);
assertEquals(String.valueOf(privacyLevel),
actualPrivacyValue);
} else exceptionExpected("c", OUTPUT_DIR);
}
@@ -153,16 +153,14 @@ public class MatrixMultiplicationPropagationTest extends
AutomatedTestBase {
* @param dir directory of variable
*/
private static void exceptionExpected(String variable, String dir){
- boolean JSONExceptionThrown = false;
+ String actualPrivacyValue = null;
try{
- readDMLMetaDataValue(variable, dir,
DataExpression.PRIVACY);
- } catch (JSONException e){
- JSONExceptionThrown = true;
+ actualPrivacyValue =
readDMLMetaDataPrivacyValue(variable, dir, DataExpression.PRIVACY);
} catch (Exception e){
fail("Exception occured, but JSONException was
expected. The exception thrown is: " + e.getMessage());
e.printStackTrace();
}
- assert(JSONExceptionThrown);
+ assert((PrivacyLevel.None.name().equals(actualPrivacyValue)));
}
@Test
@@ -192,7 +190,7 @@ public class MatrixMultiplicationPropagationTest extends
AutomatedTestBase {
writeInputMatrixWithMTD("a", a, false, dataCharacteristics,
privacyConstraint);
if ( privacyLevel != PrivacyLevel.None ){
- String actualPrivacyValue = readDMLMetaDataValue("a",
INPUT_DIR, DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValue("a", INPUT_DIR, DataExpression.PRIVACY);
assertEquals(String.valueOf(privacyLevel),
actualPrivacyValue);
} else exceptionExpected("a", INPUT_DIR);
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/MatrixRuntimePropagationTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/MatrixRuntimePropagationTest.java
index a72ea32..df2b643 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/MatrixRuntimePropagationTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/MatrixRuntimePropagationTest.java
@@ -95,11 +95,11 @@ public class MatrixRuntimePropagationTest extends
AutomatedTestBase
// Check that the output metadata is correct
if ( privacyLevel.equals(PrivacyLevel.Private) ) {
- String actualPrivacyValue = readDMLMetaDataValue("c",
OUTPUT_DIR, DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValue("c", OUTPUT_DIR, DataExpression.PRIVACY);
assertEquals(PrivacyLevel.Private.name(),
actualPrivacyValue);
}
else if ( privacyLevel.equals(PrivacyLevel.PrivateAggregation)
){
- String actualPrivacyValue = readDMLMetaDataValue("c",
OUTPUT_DIR, DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValue("c", OUTPUT_DIR, DataExpression.PRIVACY);
assertEquals(PrivacyLevel.PrivateAggregation.name(),
actualPrivacyValue);
}
else {
@@ -107,17 +107,14 @@ public class MatrixRuntimePropagationTest extends
AutomatedTestBase
// or that privacy level is set to none
// because no privacy metadata should be written to c
// except if the privacy written is set to private
- boolean JSONExceptionThrown = false;
String actualPrivacyValue = null;
try{
- actualPrivacyValue = readDMLMetaDataValue("c",
OUTPUT_DIR, DataExpression.PRIVACY);
- } catch (JSONException e){
- JSONExceptionThrown = true;
+ actualPrivacyValue =
readDMLMetaDataPrivacyValue("c", OUTPUT_DIR, DataExpression.PRIVACY);
} catch (Exception e){
fail("Exception occured, but JSONException was
expected. The exception thrown is: " + e.getMessage());
e.printStackTrace();
}
- assert(JSONExceptionThrown ||
(PrivacyLevel.None.name().equals(actualPrivacyValue)));
+
assert((PrivacyLevel.None.name().equals(actualPrivacyValue)));
}
}
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/PrivacyLineageTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/PrivacyLineageTest.java
index d284e8b..d9bc591 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/PrivacyLineageTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/PrivacyLineageTest.java
@@ -78,7 +78,7 @@ public class PrivacyLineageTest extends AutomatedTestBase {
}
private static void finegrainedAssertions(){
- String outputFineGrained =
readDMLMetaDataValueCatchException("C", OUTPUT_DIR,
DataExpression.FINE_GRAINED_PRIVACY);
+ String outputFineGrained = readDMLMetaDataPrivacyValue("C",
OUTPUT_DIR, DataExpression.FINE_GRAINED_PRIVACY);
Assert.assertEquals(
"{\"Private\":[[[0,0],[0,19]],[[1,0],[1,19]],[[2,0],[2,19]],[[3,0],[3,19]],[[4,0],[4,19]]],\"PrivateAggregation\":[]}",
outputFineGrained);
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/ReadWriteTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/ReadWriteTest.java
index dea5773..850032f 100644
--- a/src/test/java/org/apache/sysds/test/functions/privacy/ReadWriteTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/privacy/ReadWriteTest.java
@@ -20,6 +20,7 @@
package org.apache.sysds.test.functions.privacy;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.FileInputStream;
@@ -28,7 +29,9 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import org.apache.sysds.parser.DataExpression;
import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.apache.sysds.runtime.privacy.PrivacyConstraint;
import org.apache.sysds.runtime.privacy.PrivacyConstraint.PrivacyLevel;
import org.apache.sysds.runtime.privacy.finegrained.DataRange;
@@ -66,8 +69,8 @@ public class ReadWriteTest extends AutomatedTestBase {
writeA();
- JSONObject metadata = getMetaDataJSON("a", "in/");
- assertTrue(metadata.containsKey("fine_grained_privacy"));
+ MetaDataAll metadata = getMetaData("a", "in/");
+ assertNotNull(metadata.getFineGrainedPrivacy());
}
@Test
@@ -85,8 +88,8 @@ public class ReadWriteTest extends AutomatedTestBase {
runTest(true,false,null,-1);
compareResults(1e-9);
- JSONObject metadata = getMetaDataJSON("b");
- assertTrue(metadata.containsKey("fine_grained_privacy"));
+ MetaDataAll metadata = getMetaData("b");
+ assertNotNull(metadata.getFineGrainedPrivacy());
}
@Test
@@ -96,8 +99,8 @@ public class ReadWriteTest extends AutomatedTestBase {
writeA();
- JSONObject metadata = getMetaDataJSON("a", "in/");
- assertTrue(metadata.containsKey("fine_grained_privacy"));
+ MetaDataAll metadata = getMetaData("a", "in/");
+ assertNotNull(metadata.getFineGrainedPrivacy());
PrivacyConstraint expectedPC = new PrivacyConstraint();
setFineGrained(expectedPC);
@@ -112,8 +115,8 @@ public class ReadWriteTest extends AutomatedTestBase {
writeA();
- JSONObject metadata = getMetaDataJSON("a", "in/");
- assertTrue(metadata.containsKey("fine_grained_privacy"));
+ MetaDataAll metadata = getMetaData("a", "in/");
+ assertNotNull(metadata.getFineGrainedPrivacy());
PrivacyConstraint expectedPC = new PrivacyConstraint();
setFineGrained(expectedPC);
diff --git
a/src/test/java/org/apache/sysds/test/functions/privacy/ScalarPropagationTest.java
b/src/test/java/org/apache/sysds/test/functions/privacy/ScalarPropagationTest.java
index d2e27b3..a48c6d8 100644
---
a/src/test/java/org/apache/sysds/test/functions/privacy/ScalarPropagationTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/privacy/ScalarPropagationTest.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.assertFalse;
import java.util.HashMap;
+import org.apache.sysds.runtime.meta.MetaDataAll;
import org.junit.Test;
import org.apache.sysds.parser.DataExpression;
import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
@@ -73,7 +74,7 @@ public class ScalarPropagationTest extends AutomatedTestBase
assertEquals("Values mismatch: DMLvalue " + dmlvalue + " !=
ExpectedValue " + roundScalar,
roundScalar, dmlvalue, 0.001);
- String actualPrivacyValue =
readDMLMetaDataValueCatchException("scalar", "out/", DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValueCatchException("scalar", "out/",
DataExpression.PRIVACY);
assertEquals(String.valueOf(PrivacyLevel.Private),
actualPrivacyValue);
}
@@ -131,11 +132,11 @@ public class ScalarPropagationTest extends
AutomatedTestBase
expectedScalar, actualScalar, 0.001);
if ( expectedPrivacyLevel != PrivacyLevel.None ){
- String actualPrivacyValue =
readDMLMetaDataValueCatchException("scalar", "out/", DataExpression.PRIVACY);
+ String actualPrivacyValue =
readDMLMetaDataPrivacyValueCatchException("scalar", "out/",
DataExpression.PRIVACY);
assertEquals(String.valueOf(expectedPrivacyLevel),
actualPrivacyValue);
} else {
- JSONObject meta = getMetaDataJSON("scalar", "out/");
- assertFalse( "Metadata found for output scalar with
privacy constraint set, but input privacy level is none", meta != null &&
meta.has(DataExpression.PRIVACY) );
+ MetaDataAll meta = getMetaData("scalar", "out/");
+ assertFalse( "Metadata found for output scalar with
privacy constraint set, but input privacy level is none", meta.mtdExists() &&
meta.getPrivacy().getPrivacyLevel() != PrivacyLevel.None );
}
}