Repository: incubator-systemml
Updated Branches:
  refs/heads/master 7610a21db -> 542de374e


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/systemml/mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mlcontext.py 
b/src/main/python/systemml/mlcontext.py
new file mode 100644
index 0000000..1b90e70
--- /dev/null
+++ b/src/main/python/systemml/mlcontext.py
@@ -0,0 +1,302 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+import os
+
+try:
+    import py4j.java_gateway
+    from py4j.java_gateway import JavaObject
+except ImportError:
+    raise ImportError('Unable to import JavaObject from py4j.java_gateway. 
Hint: Make sure you are running with pyspark')
+
+from pyspark import SparkContext
+import pyspark.mllib.common
+from pyspark.sql import DataFrame, SQLContext
+from .converters import *
+
+def dml(scriptString):
+    """
+    Create a dml script object based on a string.
+
+    Parameters
+    ----------
+    scriptString: string
+        Can be a path to a dml script or a dml script itself.
+
+    Returns
+    -------
+    script: Script instance
+        Instance of a script object.
+    """
+    if not isinstance(scriptString, str):
+        raise ValueError("scriptString should be a string, got %s" % 
type(scriptString))
+    return Script(scriptString, scriptType="dml")
+
+
+def pydml(scriptString):
+    """
+    Create a pydml script object based on a string.
+
+    Parameters
+    ----------
+    scriptString: string
+        Can be a path to a pydml script or a pydml script itself.
+
+    Returns
+    -------
+    script: Script instance
+        Instance of a script object.
+    """
+    if not isinstance(scriptString, str):
+        raise ValueError("scriptString should be a string, got %s" % 
type(scriptString))
+    return Script(scriptString, scriptType="pydml")
+
+
+def _java2py(sc, obj):
+    """ Convert Java object to Python. """
+    # TODO: Port this private PySpark function.
+    obj = pyspark.mllib.common._java2py(sc, obj)
+    if isinstance(obj, JavaObject):
+        class_name = obj.getClass().getSimpleName()
+        if class_name == 'Matrix':
+            obj = Matrix(obj, sc)
+    return obj
+
+
+def _py2java(sc, obj):
+    """ Convert Python object to Java. """
+    if isinstance(obj, Matrix):
+        obj = obj._java_matrix
+    # TODO: Port this private PySpark function.
+    obj = pyspark.mllib.common._py2java(sc, obj)
+    return obj
+
+
+class Matrix(object):
+    """
+    Wrapper around a Java Matrix object.
+
+    Parameters
+    ----------
+    javaMatrix: JavaObject
+        A Java Matrix object as returned by calling `ml.execute().get()`.
+
+    sc: SparkContext
+        SparkContext
+    """
+    def __init__(self, javaMatrix, sc):
+        self._java_matrix = javaMatrix
+        self.sc = sc
+
+    def __repr__(self):
+        return "Matrix"
+
+    def toDF(self):
+        """
+        Convert the Matrix to a PySpark SQL DataFrame.
+
+        Returns
+        -------
+        df: PySpark SQL DataFrame
+            A PySpark SQL DataFrame representing the matrix, with
+            one "ID" column containing the row index (since Spark
+            DataFrames are unordered), followed by columns of doubles
+            for each column in the matrix.
+        """
+        jdf = self._java_matrix.asDataFrame()
+        df = _java2py(self.sc, jdf)
+        return df
+
+
+class MLResults(object):
+    """
+    Wrapper around a Java ML Results object.
+
+    Parameters
+    ----------
+    results: JavaObject
+        A Java MLResults object as returned by calling `ml.execute()`.
+
+    sc: SparkContext
+        SparkContext
+    """
+    def __init__(self, results, sc):
+        self._java_results = results
+        self.sc = sc
+        try:
+            if MLResults.sqlContext is None:
+                MLResults.sqlContext = SQLContext(sc)
+        except AttributeError:
+            MLResults.sqlContext = SQLContext(sc)
+
+    def __repr__(self):
+        return "MLResults"
+
+    def getNumPyArray(self, *outputs):
+        """
+        Parameters
+        ----------
+        outputs: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        outs = [convertToNumpyArr(self.sc, 
self._java_results.getMatrix(out).asBinaryBlockMatrix().getMatrixBlock()) for 
out in outputs]
+        if len(outs) == 1:
+            return outs[0]
+        return outs
+
+    def getDataFrame(self, *outputs):
+        """
+        Parameters
+        ----------
+        outputs: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        outs = [DataFrame(self._java_results.getDataFrame(out), 
MLResults.sqlContext) for out in outputs]
+        if len(outs) == 1:
+            return outs[0]
+        return outs
+
+    def get(self, *outputs):
+        """
+        Parameters
+        ----------
+        outputs: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        outs = [_java2py(self.sc, self._java_results.get(out)) for out in 
outputs]
+        if len(outs) == 1:
+            return outs[0]
+        return outs
+
+
+class Script(object):
+    """
+    Instance of a DML/PyDML Script.
+
+    Parameters
+    ----------
+    scriptString: string
+        Can be either a file path to a DML script or a DML script itself.
+
+    scriptType: string
+        Script language, either "dml" for DML (R-like) or "pydml" for PyDML 
(Python-like).
+    """
+    def __init__(self, scriptString, scriptType="dml"):
+        self.scriptString = scriptString
+        self.scriptType = scriptType
+        self._input = {}
+        self._output = []
+
+    def input(self, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        args: name, value tuple
+            where name is a string, and currently supported value formats
+            are double, string, dataframe, rdd, and list of such object.
+
+        kwargs: dict of name, value pairs
+            To know what formats are supported for name and value, look above.
+        """
+        if args and len(args) != 2:
+            raise ValueError("Expected name, value pair.")
+        elif args:
+            self._input[args[0]] = args[1]
+        for name, value in kwargs.items():
+            self._input[name] = value
+        return self
+
+    def output(self, *names):
+        """
+        Parameters
+        ----------
+        names: string, list of strings
+            Output variables as defined inside the DML script.
+        """
+        self._output.extend(names)
+        return self
+
+
+class MLContext(object):
+    """
+    Wrapper around the new SystemML MLContext.
+
+    Parameters
+    ----------
+    sc: SparkContext
+        SparkContext
+    """
+    def __init__(self, sc):
+        if not isinstance(sc, SparkContext):
+            raise ValueError("Expected sc to be a SparkContext, got " % sc)
+        self._sc = sc
+        self._ml = sc._jvm.org.apache.sysml.api.mlcontext.MLContext(sc._jsc)
+
+    def __repr__(self):
+        return "MLContext"
+
+    def execute(self, script):
+        """
+        Execute a DML / PyDML script.
+
+        Parameters
+        ----------
+        script: Script instance
+            Script instance defined with the appropriate input and output 
variables.
+
+        Returns
+        -------
+        ml_results: MLResults
+            MLResults instance.
+        """
+        if not isinstance(script, Script):
+            raise ValueError("Expected script to be an instance of Script")
+        scriptString = script.scriptString
+        if script.scriptType == "dml":
+            if scriptString.endswith(".dml"):
+                if os.path.exists(scriptString):
+                    script_java = 
self._sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.dmlFromFile(scriptString)
+                else:
+                    raise ValueError("path: %s does not exist" % scriptString)
+            else:
+                script_java = 
self._sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.dml(scriptString)
+        elif script.scriptType == "pydml":
+            if scriptString.endswith(".pydml"):
+                if os.path.exists(scriptString):
+                    script_java = 
self._sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.pydmlFromFile(scriptString)
+                else:
+                    raise ValueError("path: %s does not exist" % scriptString)
+            else:
+                script_java = 
self._sc._jvm.org.apache.sysml.api.mlcontext.ScriptFactory.pydml(scriptString)
+
+        for key, val in script._input.items():
+            # `in` is a reserved word ("keyword") in Python, so 
`script_java.in(...)` is not
+            # allowed. Therefore, we use the following code in which we 
retrieve a function
+            # representing `script_java.in`, and then call it with the 
arguments.  This is in
+            # lieu of adding a new `input` method on the JVM side, as that 
would complicate use
+            # from Scala/Java.
+            py4j.java_gateway.get_method(script_java, "in")(key, 
_py2java(self._sc, val))
+        for val in script._output:
+            script_java.out(val)
+        return MLResults(self._ml.execute(script_java), self._sc)
+
+
+__all__ = ['MLResults', 'MLContext', 'Script', 'dml', 'pydml']

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/systemml/mllearn/__init__.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/__init__.py 
b/src/main/python/systemml/mllearn/__init__.py
new file mode 100644
index 0000000..69cab58
--- /dev/null
+++ b/src/main/python/systemml/mllearn/__init__.py
@@ -0,0 +1,25 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from .estimators import *
+
+__all__ = estimators.__all__
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/systemml/mllearn/estimators.py
----------------------------------------------------------------------
diff --git a/src/main/python/systemml/mllearn/estimators.py 
b/src/main/python/systemml/mllearn/estimators.py
new file mode 100644
index 0000000..5d33d64
--- /dev/null
+++ b/src/main/python/systemml/mllearn/estimators.py
@@ -0,0 +1,302 @@
+#!/usr/bin/python
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+from pyspark.context import SparkContext 
+from pyspark.sql import DataFrame, SQLContext
+from pyspark.rdd import RDD
+import numpy as np
+import pandas as pd
+import sklearn as sk
+from pyspark.ml.feature import VectorAssembler
+from pyspark.mllib.linalg import Vectors
+from pyspark.ml import Estimator, Model
+
+from ..converters import *
+
+def assemble(sqlCtx, pdf, inputCols, outputCol):
+    tmpDF = sqlCtx.createDataFrame(pdf, list(pdf.columns))
+    assembler = VectorAssembler(inputCols=list(inputCols), outputCol=outputCol)
+    return assembler.transform(tmpDF)
+
+class BaseSystemMLEstimator(Estimator):
+    featuresCol = 'features'
+    labelCol = 'label'
+    
+    def setFeaturesCol(self, colName):
+        """
+        Sets the default column name for features of PySpark DataFrame.
+        
+        Parameters
+        ----------
+        colName: column name for features (default: 'features')
+        """
+        self.featuresCol = colName
+        
+    def setLabelCol(self, colName):
+        """
+        Sets the default column name for features of PySpark DataFrame.
+        
+        Parameters
+        ----------
+        colName: column name for features (default: 'label')
+        """
+        self.labelCol = colName
+        
+    # Returns a model after calling fit(df) on Estimator object on JVM    
+    def _fit(self, X):
+        """
+        Invokes the fit method on Estimator object on JVM if X is PySpark 
DataFrame
+        
+        Parameters
+        ----------
+        X: PySpark DataFrame that contain the columns featuresCol (default: 
'features') and labelCol (default: 'label')
+        """
+        if hasattr(X, '_jdf') and self.featuresCol in X.columns and 
self.labelCol in X.columns:
+            self.model = self.estimator.fit(X._jdf)
+            return self
+        else:
+            raise Exception('Incorrect usage: Expected dataframe as input with 
features/label as columns')
+    
+    def fit(self, X, y=None, params=None):
+        """
+        Invokes the fit method on Estimator object on JVM if X and y are on of 
the supported data types
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        if y is None:
+            return self._fit(X)
+        elif y is not None and isinstance(X, SUPPORTED_TYPES) and 
isinstance(y, SUPPORTED_TYPES):
+            if self.transferUsingDF:
+                pdfX = convertToPandasDF(X)
+                pdfY = convertToPandasDF(y)
+                if getNumCols(pdfY) != 1:
+                    raise Exception('y should be a column vector')
+                if pdfX.shape[0] != pdfY.shape[0]:
+                    raise Exception('Number of rows of X and y should match')
+                colNames = pdfX.columns
+                pdfX[self.labelCol] = pdfY[pdfY.columns[0]]
+                df = assemble(self.sqlCtx, pdfX, colNames, 
self.featuresCol).select(self.featuresCol, self.labelCol)
+                self.model = self.estimator.fit(df._jdf)
+            else:
+                numColsy = getNumCols(y)
+                if numColsy != 1:
+                    raise Exception('Expected y to be a column vector')
+                self.model = self.estimator.fit(convertToMatrixBlock(self.sc, 
X), convertToMatrixBlock(self.sc, y))
+            if self.setOutputRawPredictionsToFalse:
+                self.model.setOutputRawPredictions(False)
+            return self
+        else:
+            raise Exception('Unsupported input type')
+    
+    def transform(self, X):
+        return self.predict(X)
+    
+    # Returns either a DataFrame or MatrixBlock after calling 
transform(X:MatrixBlock, y:MatrixBlock) on Model object on JVM    
+    def predict(self, X):
+        """
+        Invokes the transform method on Estimator object on JVM if X and y are 
on of the supported data types
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix or PySpark 
DataFrame
+        """
+        if isinstance(X, SUPPORTED_TYPES):
+            if self.transferUsingDF:
+                pdfX = convertToPandasDF(X)
+                df = assemble(self.sqlCtx, pdfX, pdfX.columns, 
self.featuresCol).select(self.featuresCol)
+                retjDF = self.model.transform(df._jdf)
+                retDF = DataFrame(retjDF, self.sqlCtx)
+                retPDF = retDF.sort('ID').select('prediction').toPandas()
+                if isinstance(X, np.ndarray):
+                    return retPDF.as_matrix().flatten()
+                else:
+                    return retPDF
+            else:
+                retNumPy = convertToNumpyArr(self.sc, 
self.model.transform(convertToMatrixBlock(self.sc, X)))
+                if isinstance(X, np.ndarray):
+                    return retNumPy
+                else:
+                    return retNumPy # TODO: Convert to Pandas
+        elif hasattr(X, '_jdf'):
+            if self.featuresCol in X.columns:
+                # No need to assemble as input DF is likely coming via 
MLPipeline
+                df = X
+            else:
+                assembler = VectorAssembler(inputCols=X.columns, 
outputCol=self.featuresCol)
+                df = assembler.transform(X)
+            retjDF = self.model.transform(df._jdf)
+            retDF = DataFrame(retjDF, self.sqlCtx)
+            # Return DF
+            return retDF.sort('ID')
+        else:
+            raise Exception('Unsupported input type')
+            
+class BaseSystemMLClassifier(BaseSystemMLEstimator):
+
+    def score(self, X, y):
+        """
+        Scores the predicted value with ground truth 'y'
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        return sk.metrics.accuracy_score(y, self.predict(X))    
+
+class BaseSystemMLRegressor(BaseSystemMLEstimator):
+
+    def score(self, X, y):
+        """
+        Scores the predicted value with ground truth 'y'
+        
+        Parameters
+        ----------
+        X: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        y: NumPy ndarray, Pandas DataFrame, scipy sparse matrix
+        """
+        return sk.metrics.r2_score(y, self.predict(X), 
multioutput='variance_weighted')
+
+
+class LogisticRegression(BaseSystemMLClassifier):
+    def __init__(self, sqlCtx, penalty='l2', fit_intercept=True, max_iter=100, 
max_inner_iter=0, tol=0.000001, C=1.0, solver='newton-cg', 
transferUsingDF=False):
+        """
+        Performs both binomial and multinomial logistic regression.
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        penalty: Only 'l2' supported
+        fit_intercept: Specifies whether to add intercept or not (default: 
True)
+        max_iter: Maximum number of outer (Fisher scoring) iterations 
(default: 100)
+        max_inner_iter: Maximum number of inner (conjugate gradient) 
iterations, or 0 if no maximum limit provided (default: 0)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        solver: Only 'newton-cg' solver supported
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "logReg"
+        self.estimator = 
self.sc._jvm.org.apache.sysml.api.ml.LogisticRegression(self.uid, 
self.sc._jsc.sc())
+        self.estimator.setMaxOuterIter(max_iter)
+        self.estimator.setMaxInnerIter(max_inner_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = True
+        if penalty != 'l2':
+            raise Exception('Only l2 penalty is supported')
+        if solver != 'newton-cg':
+            raise Exception('Only newton-cg solver supported')
+
+class LinearRegression(BaseSystemMLRegressor):
+
+    def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, 
C=1.0, solver='newton-cg', transferUsingDF=False):
+        """
+        Performs linear regression to model the relationship between one 
numerical response variable and one or more explanatory (feature) variables..
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        fit_intercept: Specifies whether to add intercept or not (default: 
True)
+        max_iter: Maximum number of conjugate gradient iterations, or 0 if no 
maximum limit provided (default: 100)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        solver: Supports either 'newton-cg' or 'direct-solve' (default: 
'newton-cg').  
+        Depending on the size and the sparsity of the feature matrix, one or 
the other solver may be more efficient.
+        'direct-solve' solver is more efficient when the number of features is 
relatively small (m < 1000) and
+        input matrix X is either tall or fairly dense; otherwise 'newton-cg' 
solver is more efficient.
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "lr"
+        if solver == 'newton-cg' or solver == 'direct-solve':
+            self.estimator = 
self.sc._jvm.org.apache.sysml.api.ml.LinearRegression(self.uid, 
self.sc._jsc.sc(), solver)
+        else:
+            raise Exception('Only newton-cg solver supported')
+        self.estimator.setMaxIter(max_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False
+
+
+class SVM(BaseSystemMLClassifier):
+
+    def __init__(self, sqlCtx, fit_intercept=True, max_iter=100, tol=0.000001, 
C=1.0, is_multi_class=False, transferUsingDF=False):
+        """
+        Performs both binary-class and multiclass SVM (Support Vector 
Machines).
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        fit_intercept: Specifies whether to add intercept or not (default: 
True)
+        max_iter: Maximum number iterations (default: 100)
+        tol: Tolerance used in the convergence criterion (default: 0.000001)
+        C: 1/regularization parameter (default: 1.0)
+        is_multi_class: Specifies whether to use binary-class SVM or 
multi-class SVM algorithm (default: False)
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "svm"
+        self.estimator = self.sc._jvm.org.apache.sysml.api.ml.SVM(self.uid, 
self.sc._jsc.sc(), is_multi_class)
+        self.estimator.setMaxIter(max_iter)
+        if C <= 0:
+            raise Exception('C has to be positive')
+        reg = 1.0 / C
+        self.estimator.setRegParam(reg)
+        self.estimator.setTol(tol)
+        self.estimator.setIcpt(int(fit_intercept))
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False    
+
+class NaiveBayes(BaseSystemMLClassifier):
+
+    def __init__(self, sqlCtx, laplace=1.0, transferUsingDF=False):
+        """
+        Performs both binary-class and multiclass SVM (Support Vector 
Machines).
+        
+        Parameters
+        ----------
+        sqlCtx: PySpark SQLContext
+        laplace: Laplace smoothing specified by the user to avoid creation of 
0 probabilities (default: 1.0)
+        """
+        self.sqlCtx = sqlCtx
+        self.sc = sqlCtx._sc
+        self.uid = "nb"
+        self.estimator = 
self.sc._jvm.org.apache.sysml.api.ml.NaiveBayes(self.uid, self.sc._jsc.sc())
+        self.estimator.setLaplace(laplace)
+        self.transferUsingDF = transferUsingDF
+        self.setOutputRawPredictionsToFalse = False
+
+__all__ = ['LogisticRegression', 'LinearRegression', 'SVM', 'NaiveBayes']

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/tests/test_mlcontext.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mlcontext.py 
b/src/main/python/tests/test_mlcontext.py
index 182a4d8..6a6f64e 100644
--- a/src/main/python/tests/test_mlcontext.py
+++ b/src/main/python/tests/test_mlcontext.py
@@ -23,7 +23,7 @@ import unittest
 
 from pyspark.context import SparkContext
 
-from SystemML import MLContext, dml, pydml
+from systemml import MLContext, dml, pydml
 
 sc = SparkContext()
 ml = MLContext(sc)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/tests/test_mllearn.py
----------------------------------------------------------------------
diff --git a/src/main/python/tests/test_mllearn.py 
b/src/main/python/tests/test_mllearn.py
index 22f798f..27b9813 100644
--- a/src/main/python/tests/test_mllearn.py
+++ b/src/main/python/tests/test_mllearn.py
@@ -20,7 +20,7 @@
 #
 #-------------------------------------------------------------
 from sklearn import datasets, neighbors
-from SystemML.mllearn import LogisticRegression, LinearRegression, SVM, 
NaiveBayes 
+from systemml.mllearn import LogisticRegression, LinearRegression, SVM, 
NaiveBayes 
 from pyspark.sql import SQLContext
 from pyspark.context import SparkContext
 import unittest

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/542de374/src/main/python/uploadToPyPI.sh
----------------------------------------------------------------------
diff --git a/src/main/python/uploadToPyPI.sh b/src/main/python/uploadToPyPI.sh
deleted file mode 100644
index c892f3d..0000000
--- a/src/main/python/uploadToPyPI.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-cd ../../..
-mvn clean package -P distribution
-tar -xzf target/systemml-*-SNAPSHOT.tar.gz -C src/main/python/SystemML
-
-cd src/main/python/SystemML
-mv systemml-*-incubating-SNAPSHOT SystemML-java
-
-cd ..
-echo "Preparing to upload to PyPI ...."
-python setup.py register sdist upload
-
-rm -r SystemML/SystemML-java
\ No newline at end of file

Reply via email to