Repository: incubator-systemml Updated Branches: refs/heads/master e3a75d141 -> 88ad73939
[SYSTEMML-1185] Updating MachineLearning Notebook Updating the image sizes back to the full 256x256x3 shape. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/88ad7393 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/88ad7393 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/88ad7393 Branch: refs/heads/master Commit: 88ad73939cc19278979c93b55ce394557535f788 Parents: be99410 Author: Mike Dusenberry <[email protected]> Authored: Thu Mar 9 22:34:16 2017 -0800 Committer: Mike Dusenberry <[email protected]> Committed: Thu Mar 9 22:35:56 2017 -0800 ---------------------------------------------------------------------- projects/breast_cancer/MachineLearning.ipynb | 170 +++++++++++++++++----- 1 file changed, 130 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/88ad7393/projects/breast_cancer/MachineLearning.ipynb ---------------------------------------------------------------------- diff --git a/projects/breast_cancer/MachineLearning.ipynb b/projects/breast_cancer/MachineLearning.ipynb index 9a11450..4999250 100644 --- a/projects/breast_cancer/MachineLearning.ipynb +++ b/projects/breast_cancer/MachineLearning.ipynb @@ -2,7 +2,10 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Predicting Breast Cancer Proliferation Scores with Apache Spark and Apache SystemML\n", "\n", @@ -12,7 +15,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Setup" ] @@ -21,7 +27,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -44,7 +52,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -53,7 +63,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Read in train & val data" ] @@ -62,13 +75,15 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ "# Settings\n", - "size=64\n", - "grayscale = True\n", + "size=256\n", + "grayscale = False\n", "c = 1 if grayscale else 3\n", "p = 0.01" ] @@ -77,12 +92,18 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ - "tr_sample_filename = os.path.join(\"data\", \"train_{}_sample_{}{}.parquet\".format(p, size, \"_grayscale\" if grayscale else \"\"))\n", - "val_sample_filename = os.path.join(\"data\", \"val_{}_sample_{}{}.parquet\".format(p, size, \"_grayscale\" if grayscale else \"\"))\n", + "if p < 1:\n", + " tr_sample_filename = os.path.join(\"data\", \"train_{}_sample_{}{}.parquet\".format(p, size, \"_grayscale\" if grayscale else \"\"))\n", + " val_sample_filename = os.path.join(\"data\", \"val_{}_sample_{}{}.parquet\".format(p, size, \"_grayscale\" if grayscale else \"\"))\n", + "else:\n", + " tr_filename = \"train_{}{}.parquet\".format(size, \"_grayscale\" if grayscale else \"\")\n", + " val_filename = \"val_{}{}.parquet\".format(size, \"_grayscale\" if grayscale else \"\")\n", "train_df = sqlContext.read.load(tr_sample_filename)\n", "val_df = sqlContext.read.load(val_sample_filename)\n", "train_df, val_df" @@ -92,7 +113,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -105,7 +128,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -116,7 +141,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Extract X and Y matrices" ] @@ -125,7 +153,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -140,7 +170,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Convert to SystemML Matrices\n", "Note: This allows for reuse of the matrices on multiple\n", @@ -154,7 +187,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -180,7 +215,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Trigger Caching (Optional)\n", "Note: This will take a while and is not necessary, but doing it\n", @@ -193,7 +231,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -211,7 +251,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Save Matrices (Optional)" ] @@ -220,7 +263,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -236,21 +281,30 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "---" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "# Softmax Classifier" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Sanity Check: Overfit Small Portion" ] @@ -259,7 +313,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -286,7 +342,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Train" ] @@ -295,7 +354,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -321,7 +382,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Eval" ] @@ -330,7 +394,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -351,7 +417,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "---" ] @@ -359,7 +428,9 @@ { "cell_type": "markdown", "metadata": { - "collapsed": true + "collapsed": true, + "deletable": true, + "editable": true }, "source": [ "# LeNet-like ConvNet" @@ -367,7 +438,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Sanity Check: Overfit Small Portion" ] @@ -376,7 +450,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -407,7 +483,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Hyperparameter Search" ] @@ -416,7 +495,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -461,7 +542,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Train" ] @@ -470,7 +554,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -500,7 +586,10 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "deletable": true, + "editable": true + }, "source": [ "## Eval" ] @@ -509,7 +598,9 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "collapsed": false, + "deletable": true, + "editable": true }, "outputs": [], "source": [ @@ -537,7 +628,6 @@ } ], "metadata": { - "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python",
