This is an automated email from the ASF dual-hosted git repository.
ssiddiqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 233f30d [MINOR] Merging crossv.dml and frameRemoveEmpty.dml into
utils.dml
233f30d is described below
commit 233f30d778249ea3b83010bb902da964d1955d43
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Tue Apr 27 22:17:04 2021 +0200
[MINOR] Merging crossv.dml and frameRemoveEmpty.dml into utils.dml
---
scripts/pipelines/scripts/crossV.dml | 103 ---------------------
scripts/pipelines/scripts/frameRemoveEmpty.dml | 36 -------
.../sysds/runtime/matrix/data/FrameBlock.java | 2 +-
.../test/functions/binary/frame/FrameMapTest.java | 4 +-
.../functions/pipelines/testClassification.dml | 4 +-
5 files changed, 5 insertions(+), 144 deletions(-)
diff --git a/scripts/pipelines/scripts/crossV.dml
b/scripts/pipelines/scripts/crossV.dml
deleted file mode 100644
index 4a2a432..0000000
--- a/scripts/pipelines/scripts/crossV.dml
+++ /dev/null
@@ -1,103 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-classifyDirty = function(Matrix[Double] Xtrain, Matrix[Double] ytrain,
Matrix[Double] opt,
- Matrix[Double] mask, Boolean isWeighted = TRUE, Integer cv)
- return (Double accuracy)
-{
- # classify without cleaning fill with edfault values 1
- Xtrain = replace(target = Xtrain, pattern = NaN, replacement=1)
-
- dX_train = dummycoding(Xtrain, mask)
-
- accuracy = crossV(Xtrain, ytrain, cv, mask, opt, isWeighted)
- accuracy = mean(accuracy)
-
- # # learn model
- # B = multiLogReg(X=dX_train, Y=ytrain, icpt=2, reg=as.scalar(opt[1,1]),
maxi = as.scalar(opt[1,2]), maxii= 0, verbose=FALSE);
- # [M,pred,accuracy] = multiLogRegPredict(X=dX_test, B=B, Y=ytest,
verbose=FALSE);
-
- # if(isWeighted)
- # accuracy = getAccuracy(y=ytest, yhat=pred, isWeighted=isWeighted)
- print("cross validated dirty accuracy "+accuracy)
-}
-
-
-crossV = function(Matrix[double] X, Matrix[double] y, Integer k,
Matrix[Double] mask,
- Matrix[Double] MLhp, Boolean isWeighted)
-return (Matrix[Double] accuracyMatrix)
-{
-
- accuracyMatrix = matrix(0, k, 1)
-
- dataList = list()
- testL = list()
- data = order(target = cbind(y, X), by = 1, decreasing=FALSE,
index.return=FALSE)
- classes = table(data[, 1], 1)
- ins_per_fold = classes/k
- start_fold = matrix(1, rows=nrow(ins_per_fold), cols=1)
- fold_idxes = cbind(start_fold, ins_per_fold)
-
- start_i = 0; end_i = 0; idx_fold = 1;;
- for(i in 1:k)
- {
- fold_i = matrix(0, 0, ncol(data))
- start=0; end=0;
- for(j in 1:nrow(classes))
- {
- idx = as.scalar(classes[j, 1])
- start = end + 1;
- end = end + idx
- class_j = data[start:end, ]
-
-
- start_i = as.scalar(fold_idxes[j, 1]);
- end_i = as.scalar(fold_idxes[j, 2])
-
- fold_i = rbind(fold_i, class_j[start_i:end_i, ])
- }
-
- dataList = append(dataList, fold_i)
- fold_idxes[, 1] = fold_idxes[, 2] + 1
- fold_idxes[, 2] += ins_per_fold
- while(FALSE){}
- }
-
- for(i in seq(1,k))
- {
- [trainList, hold_out] = remove(dataList, i)
- trainset = rbind(trainList)
- testset = as.matrix(hold_out)
- trainX = trainset[, 2:ncol(trainset)]
- trainy = trainset[, 1]
- testX = testset[, 2:ncol(testset)]
- testy = testset[, 1]
- beta = multiLogReg(X=trainX, Y=trainy, icpt=1, reg=as.scalar(MLhp[1,1]),
tol= 1e-9,
- maxi=as.scalar(MLhp[1,2]), maxii= 50, verbose=FALSE);
- [prob, yhat, a] = multiLogRegPredict(testX, beta, testy, FALSE)
- accuracy = getAccuracy(testy, yhat, isWeighted)
- accuracyMatrix[i] = accuracy
- }
-
-}
-
-
-
diff --git a/scripts/pipelines/scripts/frameRemoveEmpty.dml
b/scripts/pipelines/scripts/frameRemoveEmpty.dml
deleted file mode 100644
index 71be5a8..0000000
--- a/scripts/pipelines/scripts/frameRemoveEmpty.dml
+++ /dev/null
@@ -1,36 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-
-# remove empty wrapper for frames
-frameRemoveEmpty = function(Frame[Unknown] target, String margin,
Matrix[Double] select)
-return (Frame[Unknown] frameblock)
-{
- idx = seq(1, ncol(target))
- # get the indexes of columns for recode transformation
- index = vectorToCsv(idx)
- # recode logical pipelines for easy handling
- jspecR = "{ids:true, recode:["+index+"]}";
- [X, M] = transformencode(target=target, spec=jspecR);
- X = removeEmpty(target = X, margin = margin, select = select)
- frameblock = transformdecode(target = X, spec = jspecR, meta = M)
-}
\ No newline at end of file
diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
index d4674a2..b280903 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/FrameBlock.java
@@ -93,7 +93,7 @@ public class FrameBlock implements CacheBlock, Externalizable
{
private Array[] _coldata = null;
/** Cached size in memory to avoid repeated scans of string columns */
- long _msize = -1;
+ long _msize = -1;
public FrameBlock() {
_numRows = 0;
diff --git
a/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
b/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
index e72e5e2..9fef795 100644
---
a/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/binary/frame/FrameMapTest.java
@@ -139,8 +139,8 @@ public class FrameMapTest extends AutomatedTestBase {
else if(type == TestType.SHERLOCK_PREP) {
String[][] data = new String[1][1];
data[0][0] = "\"['Global', 'United States',
'Australia']\"";
- FileFormatPropertiesCSV ffp = new
FileFormatPropertiesCSV();
- ffp.setDelim(";");
+ FileFormatPropertiesCSV ffp = new
FileFormatPropertiesCSV();
+ ffp.setDelim(";");
FrameWriterFactory.createFrameWriter(FileFormat.CSV, ffp).
writeFrameToHDFS(new
FrameBlock(schemaStrings1, data), input("A"), 1, 1);
}
diff --git a/src/test/scripts/functions/pipelines/testClassification.dml
b/src/test/scripts/functions/pipelines/testClassification.dml
index 93f90ed..1a33bf2 100644
--- a/src/test/scripts/functions/pipelines/testClassification.dml
+++ b/src/test/scripts/functions/pipelines/testClassification.dml
@@ -59,7 +59,7 @@ X = dropInvalidType(F, getSchema)
if(sum(getMask) > 0)
{
# always recode the label
- index = utils::vectorToCsv(getMask)
+ index = vectorToCsv(getMask)
jspecR = "{ids:true, recode:["+index+"]}"
[eX, X_meta] = transformencode(target=X, spec=jspecR);
# change the schema to reflect the encoded values
@@ -88,7 +88,7 @@ allLgs = logical::transformLogical(lgSeed)
d_accuracy = 0
# 4. perform the sampling
-[eX, eY] = utils::doSample(eX, eY, sample)
+[eX, eY] = doSample(eX, eY, sample)
# 5. get train test and validation set with balanced class distribution
# [X_train, y_train, X_test, y_test] = splitBalanced(X=eX, Y=eY,
splitRatio=0.7, verbose=FALSE)