Repository: incubator-systemml
Updated Branches:
  refs/heads/master 169a2da5f -> 4c162afd9


[SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is 
passed to SystemML

Here is an example pyspark session demonstrating this PR:
>>> from mlxtend.data import mnist_data
>>> import numpy as np
>>> from sklearn.utils import shuffle
X, y = mnist_data()
from systemml import MLContext, dml
ml = MLContext(sc)
script = dml('print(sum(X))').input(X=X)
ml.execute(script)
script = dml('print(sum(X))').input(X=y)
ml.execute(script)
script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
ml.execute(script)>>> X, y = mnist_data()
>>> from systemml import MLContext, dml
>>> ml = MLContext(sc)

Welcome to Apache SystemML!

>>> script = dml('print(sum(X))').input(X=X)
>>> ml.execute(script)
1.31267102E8
MLResults
>>> script = dml('print(sum(X))').input(X=y)
>>> ml.execute(script)
...
TypeError: Expected 2-dimensional ndarray, instead passed 1-dimensional
ndarray. Hint: If you intend to pass the 1-dimensional ndarray as a
column-vector, please reshape it: input_ndarray.reshape(-1, 1)
>>> script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
>>> ml.execute(script)
22500.0

Closes #438.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4c162afd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4c162afd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4c162afd

Branch: refs/heads/master
Commit: 4c162afd93932b0fbf74d76113308ba3b5328878
Parents: 169a2da
Author: Niketan Pansare <[email protected]>
Authored: Thu Mar 23 11:41:16 2017 -0700
Committer: Niketan Pansare <[email protected]>
Committed: Thu Mar 23 11:44:33 2017 -0700

----------------------------------------------------------------------
 docs/beginners-guide-python.md                 |  2 +-
 src/main/python/systemml/converters.py         | 10 +++++++++-
 src/main/python/systemml/mllearn/estimators.py |  6 +++++-
 3 files changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/docs/beginners-guide-python.md
----------------------------------------------------------------------
diff --git a/docs/beginners-guide-python.md b/docs/beginners-guide-python.md
index ffab09e..24f7151 100644
--- a/docs/beginners-guide-python.md
+++ b/docs/beginners-guide-python.md
@@ -183,7 +183,7 @@ y_train = diabetes.target[:-20]
 y_test = diabetes.target[-20:]
 # Train Linear Regression model
 X = sml.matrix(X_train)
-y = sml.matrix(y_train)
+y = sml.matrix(np.matrix(y_train).T)
 A = X.transpose().dot(X)
 b = X.transpose().dot(y)
 beta = sml.solve(A, b).toNumPy()

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/src/main/python/systemml/converters.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/converters.py 
b/src/main/python/systemml/converters.py
index ce709f5..202d19a 100644
--- a/src/main/python/systemml/converters.py
+++ b/src/main/python/systemml/converters.py
@@ -82,8 +82,16 @@ def convertToMatrixBlock(sc, src, maxSizeBlockInMB=8):
         src = coo_matrix(src,  dtype=np.float64)
     else:
         src = np.asarray(src, dtype=np.float64)
+    if len(src.shape) != 2:
+        hint = ''
+        num_dim = len(src.shape)
+        type1 = str(type(src).__name__)
+        if type(src) == np.ndarray and num_dim == 1:
+            hint = '. Hint: If you intend to pass the 1-dimensional ndarray as 
a column-vector, please reshape it: input_ndarray.reshape(-1, 1)'
+        elif num_dim > 2:
+            hint = '. Hint: If you intend to pass a tensor, please reshape it 
into (N, CHW) format'
+        raise TypeError('Expected 2-dimensional ' + type1 + ', instead passed 
' + str(num_dim) + '-dimensional ' + type1 + hint)
     numRowsPerBlock = int(math.ceil((maxSizeBlockInMB*1000000) / 
(src.shape[1]*8)))
-    # print("numRowsPerBlock=" + str(numRowsPerBlock))
     multiBlockTransfer = False if numRowsPerBlock >= src.shape[0] else True
     if not multiBlockTransfer:
         if isinstance(src, spmatrix):

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py 
b/src/main/python/systemml/mllearn/estimators.py
index 4188ade..d6ad069 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -81,7 +81,11 @@ class BaseSystemMLEstimator(Estimator):
     
     def _fit_numpy(self):
         try:
-            self.model = self.estimator.fit(convertToMatrixBlock(self.sc, 
self.X), convertToMatrixBlock(self.sc, self.y))
+            if type(self.y) == np.ndarray and len(self.y.shape) == 1:
+                # Since we know that mllearn always needs a column vector
+                self.y = np.matrix(self.y).T
+            y_mb = convertToMatrixBlock(self.sc, self.y)
+            self.model = self.estimator.fit(convertToMatrixBlock(self.sc, 
self.X), y_mb)
         except Py4JError:
             traceback.print_exc()
                     

Reply via email to