systemml git commit: [MINOR] add 2 new DML examples to Jupyter notebook

reinwald Mon, 04 Jun 2018 16:55:27 -0700

Repository: systemml
Updated Branches:
  refs/heads/master d44b3280f -> 8084dc127



[MINOR] add 2 new DML examples to Jupyter notebook


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/8084dc12
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/8084dc12
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/8084dc12

Branch: refs/heads/master
Commit: 8084dc1276663c7dfaa8983b13efb0ec11e6ee1e
Parents: d44b328
Author: Berthold Reinwald <[email protected]>
Authored: Mon Jun 4 16:54:14 2018 -0700
Committer: Berthold Reinwald <[email protected]>
Committed: Mon Jun 4 16:54:14 2018 -0700

----------------------------------------------------------------------
 ...DML Tips and Tricks (aka Fun With DML).ipynb | 691 ++++++++++++++++---
 1 file changed, 576 insertions(+), 115 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/8084dc12/samples/jupyter-notebooks/DML
 Tips and Tricks (aka Fun With DML).ipynb
----------------------------------------------------------------------
diff --git a/samples/jupyter-notebooks/DML Tips and Tricks (aka Fun With 
DML).ipynb b/samples/jupyter-notebooks/DML Tips and Tricks (aka Fun With 
DML).ipynb
index b2d2fad..6dd096c 100644
--- a/samples/jupyter-notebooks/DML Tips and Tricks (aka Fun With DML).ipynb    
+++ b/samples/jupyter-notebooks/DML Tips and Tricks (aka Fun With DML).ipynb    
@@ -4,13 +4,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "1. [Replace NaN with mode](#NaN2Mode)\n",
+    "1. [Create all value pairs for v1 and v2](#AllValuePairs)\n",
+    "* [Replace NaN with mode](#NaN2Mode)\n",
     "* [Use sample builtin function to create sample from matrix](#sample)\n",
     "* [Count of Matching Values in two Matrices/Vectors](#MatchinRows)\n",
     "* [Cross Validation](#CrossValidation)\n",
     "* [Value-based join of two Matrices](#JoinMatrices)\n",
     "* [Filter Matrix to include only Frequent Column 
Values](#FilterMatrix)\n",
-    "* [Construct (sparse) Matrix from (rowIndex, colIndex, values) 
triplets](#Construct_sparse_Matrix)\n",
+    "* [(Sparse) Matrix to/from (rowIndex, colIndex, values) conversions 
(i,j,v)](#Construct_sparse_Matrix)\n",
     "* [Find and remove duplicates in columns or 
rows](#Find_and_remove_duplicates)\n",
     "* [Set based Indexing](#Set_based_Indexing)\n",
     "* [Group by Aggregate using Linear Algebra](#Multi_column_Sorting)\n",
@@ -20,25 +21,95 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {
     "collapsed": false,
     "scrolled": false
    },
+   "outputs": [],
+   "source": [
+    "from systemml import MLContext, dml\n",
+    "ml = MLContext(sc)\n",
+    "\n",
+    "print (ml.buildTime())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create all value pairs for v1 and v2<a id=\"AllValuePairs\" />"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2017-09-22 07:57:57 UTC\n"
+      "2.000\n",
+      "1.000\n",
+      "8.000\n",
+      "3.000\n",
+      "5.000\n",
+      "6.000\n",
+      "7.000\n",
+      "\n",
+      "80.000\n",
+      "20.000\n",
+      "50.000\n",
+      "\n",
+      "2.000 80.000\n",
+      "2.000 20.000\n",
+      "2.000 50.000\n",
+      "1.000 80.000\n",
+      "1.000 20.000\n",
+      "1.000 50.000\n",
+      "8.000 80.000\n",
+      "8.000 20.000\n",
+      "8.000 50.000\n",
+      "3.000 80.000\n",
+      "3.000 20.000\n",
+      "3.000 50.000\n",
+      "5.000 80.000\n",
+      "5.000 20.000\n",
+      "5.000 50.000\n",
+      "6.000 80.000\n",
+      "6.000 20.000\n",
+      "6.000 50.000\n",
+      "7.000 80.000\n",
+      "7.000 20.000\n",
+      "7.000 50.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.000 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
      ]
     }
    ],
    "source": [
-    "from systemml import MLContext, dml, jvm_stdout\n",
-    "ml = MLContext(sc)\n",
-    "\n",
-    "print (ml.buildTime())"
+    "prog=\"\"\"\n",
+    "v1 = matrix ('2 1 8 3 5 6 7', rows = 7, cols = 1 )\n",
+    "v2 = matrix ('80 20 50', rows = 3, cols = 1 )\n",
+    "\n",
+    "nv1 = nrow (v1);\n",
+    "nv2 = nrow (v2);\n",
+    "R = cbind (\n",
+    "  matrix (v1 %*% matrix(1, 1, nv2), nv1*nv2, 1),\n",
+    "  matrix (matrix(1, nv1, 1) %*% t(v2), nv1*nv2, 1))\n",
+    "\n",
+    "print(toString(v1));\n",
+    "print(toString(v2));\n",
+    "print(toString(R));\n",
+    "\"\"\"\n",
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -52,12 +123,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This functions replaces NaN in column with mode of column"
+    "This functions replaces NaN in column i with mode of column i."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 51,
    "metadata": {
     "collapsed": false
    },
@@ -94,7 +165,8 @@
     "replaceNaNwithMode = function (matrix[double] X, integer colId) \n",
     "      return (matrix[double] X) \n",
     "{\n",
-    "   Xi = replace (target=X[,colId], pattern=0/0, 
replacement=max(X[,colId])+1)   # replace NaN with largest value + 1\n",
+    "   Xi = replace (target=X[,colId], pattern=NaN, replacement=-Inf)         
      # replace NaN with -Inf\n",
+    "   Xi = replace (target=Xi, pattern=-Inf, replacement=max(Xi)+1)          
      # replace -Inf with largest value + 1\n",
     "   agg = aggregate (target=Xi, groups=Xi, fn=\"count\")                   
        # count each distinct value\n",
     "   mode = as.scalar (rowIndexMax(t(agg[1:nrow(agg)-1, ])))                
      # mode is max frequent value except last value\n",
     "   X[,colId] = replace (target=Xi, pattern=max(Xi), replacement=mode)     
      # fill in mode\n",
@@ -107,8 +179,7 @@
     "print (\"Before: \\n\" + toString(X))\n",
     "print (\"After: \\n\" + toString(Y))\n",
     "\"\"\"\n",
-    "with jvm_stdout(True):\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -127,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 52,
    "metadata": {
     "collapsed": false
    },
@@ -145,14 +216,14 @@
       "\n",
       "sv: \n",
       "1.000\n",
-      "4.000\n",
+      "5.000\n",
       "\n",
       "samples: \n",
       "2.000 1.000\n",
-      "7.000 9.000\n",
+      "4.000 4.000\n",
       "\n",
       "SystemML Statistics:\n",
-      "Total execution time:\t\t0.001 sec.\n",
+      "Total execution time:\t\t0.000 sec.\n",
       "Number of executed Spark inst:\t0.\n",
       "\n",
       "\n"
@@ -174,8 +245,7 @@
     "print (\"sv: \\n\" + toString(sv))\n",
     "print (\"samples: \\n\" + toString(samples))\n",
     "\"\"\"\n",
-    "with jvm_stdout(True):\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -194,7 +264,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 53,
    "metadata": {
     "collapsed": false
    },
@@ -210,7 +280,7 @@
       "Number of Matches: 2.0\n",
       "\n",
       "SystemML Statistics:\n",
-      "Total execution time:\t\t0.001 sec.\n",
+      "Total execution time:\t\t0.000 sec.\n",
       "Number of executed Spark inst:\t0.\n",
       "\n",
       "\n"
@@ -228,8 +298,7 @@
     "print (\"t(Y): \" + toString(t(Y)))\n",
     "print (\"Number of Matches: \" + matches + \"\\n\")\n",
     "\"\"\"\n",
-    "with jvm_stdout(True):\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -248,82 +317,82 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 54,
    "metadata": {
-    "collapsed": true
+    "collapsed": false
    },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Test data Xyi2\n",
+      "Test data Xyi1\n",
+      "7.000 8.000 9.000 3.000\n",
       "10.000 11.000 12.000 4.000\n",
-      "16.000 17.000 18.000 6.000\n",
       "\n",
-      "Train data Xyni2\n",
+      "Train data Xyni1\n",
       "1.000 2.000 3.000 1.000\n",
       "4.000 5.000 6.000 2.000\n",
-      "7.000 8.000 9.000 3.000\n",
       "13.000 14.000 15.000 5.000\n",
+      "16.000 17.000 18.000 6.000\n",
       "\n",
-      "w2\n",
-      "95.000\n",
-      "106.000\n",
-      "117.000\n",
+      "w_1\n",
+      "170.000\n",
+      "184.000\n",
+      "198.000\n",
       "\n",
-      "stats2\n",
-      "8938.000\n",
+      "stats1\n",
+      "10537.000\n",
       "\n",
       "\n",
-      "Test data Xyi3\n",
-      "1.000 2.000 3.000 1.000\n",
-      "7.000 8.000 9.000 3.000\n",
+      "Test data Xyi2\n",
+      "13.000 14.000 15.000 5.000\n",
+      "16.000 17.000 18.000 6.000\n",
       "\n",
-      "Train data Xyni3\n",
+      "Train data Xyni2\n",
+      "1.000 2.000 3.000 1.000\n",
       "4.000 5.000 6.000 2.000\n",
+      "7.000 8.000 9.000 3.000\n",
       "10.000 11.000 12.000 4.000\n",
-      "13.000 14.000 15.000 5.000\n",
-      "16.000 17.000 18.000 6.000\n",
       "\n",
-      "w3\n",
-      "209.000\n",
-      "226.000\n",
-      "243.000\n",
+      "w_2\n",
+      "70.000\n",
+      "80.000\n",
+      "90.000\n",
       "\n",
-      "stats3\n",
-      "6844.000\n",
+      "stats2\n",
+      "7469.000\n",
       "\n",
       "\n",
-      "Test data Xyi1\n",
+      "Test data Xyi3\n",
+      "1.000 2.000 3.000 1.000\n",
       "4.000 5.000 6.000 2.000\n",
-      "13.000 14.000 15.000 5.000\n",
       "\n",
-      "Train data Xyni1\n",
-      "1.000 2.000 3.000 1.000\n",
+      "Train data Xyni3\n",
       "7.000 8.000 9.000 3.000\n",
       "10.000 11.000 12.000 4.000\n",
+      "13.000 14.000 15.000 5.000\n",
       "16.000 17.000 18.000 6.000\n",
       "\n",
-      "w1\n",
-      "158.000\n",
-      "172.000\n",
-      "186.000\n",
+      "w_3\n",
+      "222.000\n",
+      "240.000\n",
+      "258.000\n",
       "\n",
-      "stats1\n",
-      "9853.000\n",
+      "stats3\n",
+      "5109.000\n",
       "\n",
       "\n",
       "SV selection vector:\n",
       "3.000\n",
-      "1.000\n",
       "3.000\n",
-      "2.000\n",
+      "1.000\n",
       "1.000\n",
       "2.000\n",
+      "2.000\n",
       "\n",
       "SystemML Statistics:\n",
-      "Total execution time:\t\t0.024 sec.\n",
+      "Total execution time:\t\t0.014 sec.\n",
       "Number of executed Spark inst:\t0.\n",
       "\n",
       "\n"
@@ -358,14 +427,14 @@
     "      distinctLabels = aggregate( target = Xyni[,1], groups = Xyni[,1], 
fn = \"count\")\n",
     "      if ( nrow(distinctLabels) > 1)\n",
     "      {\n",
-    "         wi = trainAlg (Xyni[ ,1:ncol(Xy)-1], Xyni[ ,ncol(Xy)])           
  # wi Model for i-th training data\n",
-    "         pi = testAlg  (Xyi [ ,1:ncol(Xy)-1], wi)                         
  # pi Prediction for i-th test data\n",
-    "         ei = evalPrediction (pi, Xyi[ ,ncol(Xy)])                        
  # stats[i,] evaluation of prediction of i-th fold\n",
-    "         stats[i,] = ei\n",
+    "         w_i = trainAlg (Xyni[ ,1:ncol(Xy)-1], Xyni[ ,ncol(Xy)])          
   # w_i Model for i-th training data\n",
+    "         p_i = testAlg  (Xyi [ ,1:ncol(Xy)-1], w_i)                       
   # p_i Prediction for i-th test data\n",
+    "         e_i = evalPrediction (p_i, Xyi[ ,ncol(Xy)])                      
   # stats[i,] evaluation of prediction of i-th fold\n",
+    "         stats[i,] = e_i\n",
     "    \n",
     "         print (  \"Test data Xyi\" + i + \"\\n\" + toString(Xyi)  \n",
     "               + \"\\nTrain data Xyni\" + i + \"\\n\" + toString(Xyni)  
\n",
-    "               + \"\\nw\" + i + \"\\n\" + toString(wi) \n",
+    "               + \"\\nw_\" + i + \"\\n\" + toString(w_i) \n",
     "               + \"\\nstats\" + i + \"\\n\" + toString(stats[i,]) \n",
     "               + \"\\n\")\n",
     "      }\n",
@@ -402,8 +471,7 @@
     "}\n",
     "\"\"\"\n",
     "\n",
-    "with jvm_stdout(True):\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -477,8 +545,7 @@
     "print (\"M2 \\n\" + toString(M2))\n",
     "print (\"M1[,2] joined with M2[,2], and return matching M1 rows\\n\" + 
toString(M12))\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
@@ -497,7 +564,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
+   "execution_count": 56,
    "metadata": {
     "collapsed": false
    },
@@ -546,15 +613,14 @@
     "print (toString(M))\n",
     "print (toString(fM))\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "res = ml.execute(dml(prog))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Construct (sparse) Matrix from (rowIndex, colIndex, values) triplets<a 
id=\"Construct_sparse_Matrix\"></a>"
+    "## (Sparse) Matrix to/from (rowIndex, colIndex, values) conversions 
(i,j,v) <a id=\"Construct_sparse_Matrix\"></a>"
    ]
   },
   {
@@ -566,21 +632,106 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 57,
    "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.000 2.000 10.000\n",
+      "3.000 3.000 20.000\n",
+      "3.000 4.000 30.000\n",
+      "4.000 1.000 40.000\n",
+      "5.000 6.000 50.000\n",
+      "\n",
+      "0.000 10.000 0.000 0.000 0.000 0.000\n",
+      "0.000 0.000 0.000 0.000 0.000 0.000\n",
+      "0.000 0.000 20.000 30.000 0.000 0.000\n",
+      "40.000 0.000 0.000 0.000 0.000 0.000\n",
+      "0.000 0.000 0.000 0.000 0.000 50.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.001 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "I = matrix (\"1 3 3 4 5\", rows = 5, cols = 1)\n",
     "J = matrix (\"2 3 4 1 6\", rows = 5, cols = 1)\n",
     "V = matrix (\"10 20 30 40 50\", rows = 5, cols = 1)\n",
     "\n",
+    "IJVs = cbind(I, J, V)\n",
+    "\n",
     "M = table (I, J, V)\n",
+    "\n",
+    "print (toString (IJVs))\n",
     "print (toString (M))\n",
     "\"\"\"\n",
-    "ml.execute(dml(prog).output('M')).get('M').toNumPy()"
+    "res = ml.execute(dml(prog).output('M')).get('M').toNumPy()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Given a sparse matrix, construct ``<i,j,v>`` matrix with 3 columns 
rowIndex, colIndex, and values."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "M:\n",
+      "0.000 23.000\n",
+      "10.000 0.000\n",
+      "18.000 0.000\n",
+      "0.000 20.000\n",
+      "\n",
+      "IJVs:\n",
+      "1.000 2.000 23.000\n",
+      "2.000 1.000 10.000\n",
+      "3.000 1.000 18.000\n",
+      "4.000 2.000 20.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.001 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "prog = \"\"\"\n",
+    "M = matrix (\"0 23  10 0 18 0 0 20\", rows = 4, cols = 2)\n",
+    "\n",
+    "m = nrow(M);\n",
+    "n = ncol(M);\n",
+    "I = matrix((M!=0)*seq(1,m), m*n, 1)\n",
+    "J = matrix((M!=0)*t(seq(1,n)), m*n, 1)\n",
+    "V = matrix(M, m*n, 1)\n",
+    "IJVd = cbind(I, J, V);\n",
+    "IJVs = removeEmpty(target=IJVd, margin=\"rows\");\n",
+    "\n",
+    "print (\"M:\\n\" + toString(M))\n",
+    "print (\"IJVs:\\n\" + toString (IJVs))\n",
+    "\"\"\"\n",
+    "res = ml.execute(dml(prog).output('M')).get('M').toNumPy()"
    ]
   },
   {
@@ -599,11 +750,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 59,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.000 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[  1.],\n",
+       "       [  2.],\n",
+       "       [  3.],\n",
+       "       [  4.],\n",
+       "       [  5.],\n",
+       "       [ 10.]])"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "X = matrix (\"1 2 3 3 3 4 5 10\", rows = 8, cols = 1)\n",
@@ -623,11 +801,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 60,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.076 sec.\n",
+      "Number of executed Spark inst:\t6.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[  1.],\n",
+       "       [  2.],\n",
+       "       [  3.],\n",
+       "       [  4.],\n",
+       "       [  5.],\n",
+       "       [ 10.]])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "X = matrix (\"3 2 1 3 3 4 5 10\", rows = 8, cols = 1)\n",
@@ -647,11 +852,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 61,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.000 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[  1.],\n",
+       "       [  2.],\n",
+       "       [  3.],\n",
+       "       [  4.],\n",
+       "       [  5.],\n",
+       "       [ 10.]])"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "X = matrix (\"3 2 1 3 3 4 5 10\", rows = 8, cols = 1)\n",
@@ -680,11 +912,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 62,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 11.000 1.000 
1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 11.000 1.000 1.000 1.000 1.000 
11.000 11.000 1.000 11.000 1.000 1.000 11.000 1.000 1.000 1.000 1.000 1.000 
1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 
11.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 
1.000 1.000 1.000 1.000 11.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 
1.000 1.000 1.000 11.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.001 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[  1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,  11.,   
1.,\n",
+       "          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,  11.,   1.,   
1.,\n",
+       "          1.,   1.,  11.,  11.,   1.,  11.,   1.,   1.,  11.,   1.,   
1.,\n",
+       "          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   
1.,\n",
+       "          1.,   1.,   1.,   1.,   1.,  11.,   1.,   1.,   1.,   1.,   
1.,\n",
+       "          1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   
1.,\n",
+       "         11.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   1.,   
1.,\n",
+       "          1.,  11.]])"
+      ]
+     },
+     "execution_count": 62,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "X = matrix (1, rows = 1, cols = 100)\n",
@@ -715,11 +978,52 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 63,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.002 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n",
+      "[[  4.  10.  33.]\n",
+      " [  8.  20.  74.]\n",
+      " [  3.  20.  49.]\n",
+      " [  6.  20.  29.]\n",
+      " [  5.  30.  94.]\n",
+      " [  9.  30.  57.]\n",
+      " [  7.  40.  48.]\n",
+      " [  2.  40.  11.]\n",
+      " [  1.  50.  20.]]\n",
+      "[[ 10.]\n",
+      " [ 20.]\n",
+      " [ 30.]\n",
+      " [ 40.]\n",
+      " [ 50.]]\n",
+      "[[ 33.]\n",
+      " [ 74.]\n",
+      " [ 94.]\n",
+      " [ 48.]\n",
+      " [ 20.]]\n",
+      "[[ 0.  0.  0.  1.  0.  0.  0.  0.  0.]\n",
+      " [ 0.  0.  1.  0.  0.  1.  0.  1.  0.]\n",
+      " [ 0.  0.  0.  0.  1.  0.  0.  0.  1.]\n",
+      " [ 0.  1.  0.  0.  0.  0.  1.  0.  0.]\n",
+      " [ 1.  0.  0.  0.  0.  0.  0.  0.  0.]]\n",
+      "[[  33.]\n",
+      " [ 152.]\n",
+      " [ 151.]\n",
+      " [  59.]\n",
+      " [  20.]]\n"
+     ]
+    }
+   ],
    "source": [
     "prog = \"\"\"\n",
     "C = matrix ('50 40 20 10 30 20 40 20 30', rows = 9, cols = 1)             
                 # category data\n",
@@ -782,7 +1086,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 64,
    "metadata": {
     "collapsed": true
    },
@@ -815,11 +1119,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 65,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.000\n",
+      "3.000\n",
+      "6.000\n",
+      "4.000\n",
+      "9.000\n",
+      "15.000\n",
+      "22.000\n",
+      "8.000\n",
+      "17.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.001 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 65,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = cumsum_prod_def + \"\"\"\n",
     "X = matrix (\"1 2 3 4 5 6 7 8 9\", rows = 9, cols = 1);\n",
@@ -831,8 +1167,7 @@
     "\n",
     "print (toString(Y))\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
   },
   {
@@ -844,11 +1179,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 66,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "1.000\n",
+      "1.000\n",
+      "1.000\n",
+      "4.000\n",
+      "4.000\n",
+      "4.000\n",
+      "4.000\n",
+      "8.000\n",
+      "8.000\n",
+      "\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.001 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = cumsum_prod_def + \"\"\"\n",
     "X = matrix (\"1 2 3 4 5 6 7 8 9\", rows = 9, cols = 1);\n",
@@ -860,8 +1227,7 @@
     "\n",
     "print (toString(Y))\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
   },
   {
@@ -873,7 +1239,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 67,
    "metadata": {
     "collapsed": true
    },
@@ -902,11 +1268,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 68,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t6.081 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = cumsum_prod_def + cumsum_prod_naive_def + \"\"\"\n",
     "X = rand (rows = 20000, cols = 10, min = 0, max = 1, pdf = \"uniform\", 
sparsity = 1.0);\n",
@@ -914,17 +1302,38 @@
     "\n",
     "Y1 = cumsum_prod_naive (X, C, 0.123);\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 69,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.074 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 69,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog = cumsum_prod_def + cumsum_prod_naive_def + \"\"\"\n",
     "X = rand (rows = 20000, cols = 10, min = 0, max = 1, pdf = \"uniform\", 
sparsity = 1.0);\n",
@@ -932,8 +1341,7 @@
     "\n",
     "Y2 = cumsum_prod (X, C, 0.123);\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
   },
   {
@@ -984,7 +1392,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 70,
    "metadata": {
     "collapsed": true
    },
@@ -1023,11 +1431,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 71,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum difference between X %*% L and Identity = 
2.220446049250313E-16\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t0.309 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog =  invert_lower_triangular_def + \"\"\"\n",
     "n = 1000;\n",
@@ -1040,8 +1471,7 @@
     "\n",
     "print (\"Maximum difference between X %*% L and Identity = \" + max (abs 
(X %*% L - diag (matrix (1, rows = n, cols = 1)))));\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
   },
   {
@@ -1053,7 +1483,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 72,
    "metadata": {
     "collapsed": true
    },
@@ -1084,11 +1514,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 73,
    "metadata": {
     "collapsed": false
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Maximum difference between X %*% L and Identity = 
4.718447854656915E-16\n",
+      "SystemML Statistics:\n",
+      "Total execution time:\t\t6.890 sec.\n",
+      "Number of executed Spark inst:\t0.\n",
+      "\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "MLResults"
+      ]
+     },
+     "execution_count": 73,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "prog =  invert_lower_triangular_naive_def + \"\"\"\n",
     "n = 1000;\n",
@@ -1101,9 +1554,17 @@
     "\n",
     "print (\"Maximum difference between X %*% L and Identity = \" + max (abs 
(X %*% L - diag (matrix (1, rows = n, cols = 1)))));\n",
     "\"\"\"\n",
-    "with jvm_stdout():\n",
-    "    ml.execute(dml(prog))"
+    "ml.execute(dml(prog))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

systemml git commit: [MINOR] add 2 new DML examples to Jupyter notebook

Reply via email to