[SYSTEMML-1185] Updating Preprocessing Notebook

Adding more aggressive filtering by utilizing optical density values and
effectively skipping the 1024x1024 tiles by generating tiles of the
same size as the final "samples".


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/be994109
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/be994109
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/be994109

Branch: refs/heads/master
Commit: be9941097ef88eae0bb221142fd76ae2231ac954
Parents: e3a75d1
Author: Mike Dusenberry <[email protected]>
Authored: Thu Mar 9 22:33:13 2017 -0800
Committer: Mike Dusenberry <[email protected]>
Committed: Thu Mar 9 22:35:56 2017 -0800

----------------------------------------------------------------------
 projects/breast_cancer/Preprocessing.ipynb | 44 ++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/be994109/projects/breast_cancer/Preprocessing.ipynb
----------------------------------------------------------------------
diff --git a/projects/breast_cancer/Preprocessing.ipynb 
b/projects/breast_cancer/Preprocessing.ipynb
index 2c2cc41..e5690a9 100644
--- a/projects/breast_cancer/Preprocessing.ipynb
+++ b/projects/breast_cancer/Preprocessing.ipynb
@@ -320,6 +320,22 @@
    },
    "outputs": [],
    "source": [
+    "def optical_density(tile):\n",
+    "  \"\"\"\n",
+    "  Convert a tile to optical density values.\n",
+    "  \n",
+    "  Args:\n",
+    "    tile: A 3D NumPy array of shape (tile_size, tile_size, channels).\n",
+    "  \n",
+    "  Returns:\n",
+    "    A 3D NumPy array of shape (tile_size, tile_size, channels) 
representing\n",
+    "    optical density values.\n",
+    "  \"\"\"\n",
+    "  tile = tile.astype(np.float64)\n",
+    "  #od = -np.log10(tile/255 + 1e-8)\n",
+    "  od = -np.log((tile+1)/240)\n",
+    "  return od\n",
+    "\n",
     "def keep_tile(tile_tuple, tile_size, tissue_threshold):\n",
     "  \"\"\"\n",
     "  Determine if a tile should be kept.\n",
@@ -343,6 +359,9 @@
     "  \"\"\"\n",
     "  slide_num, tile = tile_tuple\n",
     "  if tile.shape[0:2] == (tile_size, tile_size):\n",
+    "    tile_orig = tile\n",
+    "    \n",
+    "    # Check 1\n",
     "    # Convert 3D RGB image to 2D grayscale image, from\n",
     "    # 0 (dense tissue) to 1 (plain background).\n",
     "    tile = rgb2gray(tile)\n",
@@ -366,7 +385,22 @@
     "    tile = binary_fill_holes(tile)\n",
     "    # Calculate percentage of tissue coverage.\n",
     "    percentage = tile.mean()\n",
-    "    return percentage >= tissue_threshold\n",
+    "    check1 = percentage >= tissue_threshold\n",
+    "    \n",
+    "    # Check 2\n",
+    "    # Convert to optical density values\n",
+    "    tile = optical_density(tile_orig)\n",
+    "    # Threshold at beta\n",
+    "    beta = 0.15\n",
+    "    tile = np.min(tile, axis=2) >= beta\n",
+    "    # Apply morphology for same reasons as above.\n",
+    "    tile = binary_closing(tile, disk(2))\n",
+    "    tile = binary_dilation(tile, disk(2))\n",
+    "    tile = binary_fill_holes(tile)\n",
+    "    percentage = tile.mean()\n",
+    "    check2 = percentage >= tissue_threshold\n",
+    "    \n",
+    "    return check1 and check2\n",
     "  else:\n",
     "    return False"
    ]
@@ -620,6 +654,7 @@
     "  tile_indices = (slides.flatMap(\n",
     "      lambda slide: process_slide(slide, folder, training, tile_size, 
overlap)))\n",
     "  tile_indices = tile_indices.repartition(num_partitions)\n",
+    "  tile_indices.cache()\n",
     "  tiles = tile_indices.map(lambda tile_index: 
process_tile_index(tile_index, folder, training))\n",
     "  filtered_tiles = tiles.filter(lambda tile: keep_tile(tile, tile_size, 
tissue_threshold))\n",
     "  samples = filtered_tiles.flatMap(lambda tile: process_tile(tile, 
sample_size, grayscale))\n",
@@ -705,7 +740,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": true,
+    "collapsed": false,
     "deletable": true,
     "editable": true
    },
@@ -717,7 +752,7 @@
     "\n",
     "# Settings\n",
     "training = True\n",
-    "tile_size = 1024\n",
+    "tile_size = 256\n",
     "sample_size = 256\n",
     "grayscale = False\n",
     "num_partitions = 20000\n",
@@ -799,7 +834,8 @@
    "metadata": {
     "collapsed": false,
     "deletable": true,
-    "editable": true
+    "editable": true,
+    "scrolled": false
    },
    "outputs": [],
    "source": [

Reply via email to