Repository: incubator-systemml
Updated Branches:
refs/heads/master 169a2da5f -> 4c162afd9
[SYSTEMML-1431] Throw controlled error when one-dimensional numpy array is
passed to SystemML
Here is an example pyspark session demonstrating this PR:
>>> from mlxtend.data import mnist_data
>>> import numpy as np
>>> from sklearn.utils import shuffle
X, y = mnist_data()
from systemml import MLContext, dml
ml = MLContext(sc)
script = dml('print(sum(X))').input(X=X)
ml.execute(script)
script = dml('print(sum(X))').input(X=y)
ml.execute(script)
script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
ml.execute(script)>>> X, y = mnist_data()
>>> from systemml import MLContext, dml
>>> ml = MLContext(sc)
Welcome to Apache SystemML!
>>> script = dml('print(sum(X))').input(X=X)
>>> ml.execute(script)
1.31267102E8
MLResults
>>> script = dml('print(sum(X))').input(X=y)
>>> ml.execute(script)
...
TypeError: Expected 2-dimensional ndarray, instead passed 1-dimensional
ndarray. Hint: If you intend to pass the 1-dimensional ndarray as a
column-vector, please reshape it: input_ndarray.reshape(-1, 1)
>>> script = dml('print(sum(X))').input(X=y.reshape(-1, 1))
>>> ml.execute(script)
22500.0
Closes #438.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit:
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/4c162afd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/4c162afd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/4c162afd
Branch: refs/heads/master
Commit: 4c162afd93932b0fbf74d76113308ba3b5328878
Parents: 169a2da
Author: Niketan Pansare <[email protected]>
Authored: Thu Mar 23 11:41:16 2017 -0700
Committer: Niketan Pansare <[email protected]>
Committed: Thu Mar 23 11:44:33 2017 -0700
----------------------------------------------------------------------
docs/beginners-guide-python.md | 2 +-
src/main/python/systemml/converters.py | 10 +++++++++-
src/main/python/systemml/mllearn/estimators.py | 6 +++++-
3 files changed, 15 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/docs/beginners-guide-python.md
----------------------------------------------------------------------
diff --git a/docs/beginners-guide-python.md b/docs/beginners-guide-python.md
index ffab09e..24f7151 100644
--- a/docs/beginners-guide-python.md
+++ b/docs/beginners-guide-python.md
@@ -183,7 +183,7 @@ y_train = diabetes.target[:-20]
y_test = diabetes.target[-20:]
# Train Linear Regression model
X = sml.matrix(X_train)
-y = sml.matrix(y_train)
+y = sml.matrix(np.matrix(y_train).T)
A = X.transpose().dot(X)
b = X.transpose().dot(y)
beta = sml.solve(A, b).toNumPy()
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/src/main/python/systemml/converters.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/converters.py
b/src/main/python/systemml/converters.py
index ce709f5..202d19a 100644
--- a/src/main/python/systemml/converters.py
+++ b/src/main/python/systemml/converters.py
@@ -82,8 +82,16 @@ def convertToMatrixBlock(sc, src, maxSizeBlockInMB=8):
src = coo_matrix(src, dtype=np.float64)
else:
src = np.asarray(src, dtype=np.float64)
+ if len(src.shape) != 2:
+ hint = ''
+ num_dim = len(src.shape)
+ type1 = str(type(src).__name__)
+ if type(src) == np.ndarray and num_dim == 1:
+ hint = '. Hint: If you intend to pass the 1-dimensional ndarray as
a column-vector, please reshape it: input_ndarray.reshape(-1, 1)'
+ elif num_dim > 2:
+ hint = '. Hint: If you intend to pass a tensor, please reshape it
into (N, CHW) format'
+ raise TypeError('Expected 2-dimensional ' + type1 + ', instead passed
' + str(num_dim) + '-dimensional ' + type1 + hint)
numRowsPerBlock = int(math.ceil((maxSizeBlockInMB*1000000) /
(src.shape[1]*8)))
- # print("numRowsPerBlock=" + str(numRowsPerBlock))
multiBlockTransfer = False if numRowsPerBlock >= src.shape[0] else True
if not multiBlockTransfer:
if isinstance(src, spmatrix):
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/4c162afd/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py
b/src/main/python/systemml/mllearn/estimators.py
index 4188ade..d6ad069 100644
--- a/src/main/python/systemml/mllearn/estimators.py
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -81,7 +81,11 @@ class BaseSystemMLEstimator(Estimator):
def _fit_numpy(self):
try:
- self.model = self.estimator.fit(convertToMatrixBlock(self.sc,
self.X), convertToMatrixBlock(self.sc, self.y))
+ if type(self.y) == np.ndarray and len(self.y.shape) == 1:
+ # Since we know that mllearn always needs a column vector
+ self.y = np.matrix(self.y).T
+ y_mb = convertToMatrixBlock(self.sc, self.y)
+ self.model = self.estimator.fit(convertToMatrixBlock(self.sc,
self.X), y_mb)
except Py4JError:
traceback.print_exc()