[SYSTEMML-1185] Updating Preprocessing Notebook Adding more aggressive filtering by utilizing optical density values and effectively skipping the 1024x1024 tiles by generating tiles of the same size as the final "samples".
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/be994109 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/be994109 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/be994109 Branch: refs/heads/master Commit: be9941097ef88eae0bb221142fd76ae2231ac954 Parents: e3a75d1 Author: Mike Dusenberry <[email protected]> Authored: Thu Mar 9 22:33:13 2017 -0800 Committer: Mike Dusenberry <[email protected]> Committed: Thu Mar 9 22:35:56 2017 -0800 ---------------------------------------------------------------------- projects/breast_cancer/Preprocessing.ipynb | 44 ++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/be994109/projects/breast_cancer/Preprocessing.ipynb ---------------------------------------------------------------------- diff --git a/projects/breast_cancer/Preprocessing.ipynb b/projects/breast_cancer/Preprocessing.ipynb index 2c2cc41..e5690a9 100644 --- a/projects/breast_cancer/Preprocessing.ipynb +++ b/projects/breast_cancer/Preprocessing.ipynb @@ -320,6 +320,22 @@ }, "outputs": [], "source": [ + "def optical_density(tile):\n", + " \"\"\"\n", + " Convert a tile to optical density values.\n", + " \n", + " Args:\n", + " tile: A 3D NumPy array of shape (tile_size, tile_size, channels).\n", + " \n", + " Returns:\n", + " A 3D NumPy array of shape (tile_size, tile_size, channels) representing\n", + " optical density values.\n", + " \"\"\"\n", + " tile = tile.astype(np.float64)\n", + " #od = -np.log10(tile/255 + 1e-8)\n", + " od = -np.log((tile+1)/240)\n", + " return od\n", + "\n", "def keep_tile(tile_tuple, tile_size, tissue_threshold):\n", " \"\"\"\n", " Determine if a tile should be kept.\n", @@ -343,6 +359,9 @@ " \"\"\"\n", " slide_num, tile = tile_tuple\n", " if tile.shape[0:2] == (tile_size, tile_size):\n", + " tile_orig = tile\n", + " \n", + " # Check 1\n", " # Convert 3D RGB image to 2D grayscale image, from\n", " # 0 (dense tissue) to 1 (plain background).\n", " tile = rgb2gray(tile)\n", @@ -366,7 +385,22 @@ " tile = binary_fill_holes(tile)\n", " # Calculate percentage of tissue coverage.\n", " percentage = tile.mean()\n", - " return percentage >= tissue_threshold\n", + " check1 = percentage >= tissue_threshold\n", + " \n", + " # Check 2\n", + " # Convert to optical density values\n", + " tile = optical_density(tile_orig)\n", + " # Threshold at beta\n", + " beta = 0.15\n", + " tile = np.min(tile, axis=2) >= beta\n", + " # Apply morphology for same reasons as above.\n", + " tile = binary_closing(tile, disk(2))\n", + " tile = binary_dilation(tile, disk(2))\n", + " tile = binary_fill_holes(tile)\n", + " percentage = tile.mean()\n", + " check2 = percentage >= tissue_threshold\n", + " \n", + " return check1 and check2\n", " else:\n", " return False" ] @@ -620,6 +654,7 @@ " tile_indices = (slides.flatMap(\n", " lambda slide: process_slide(slide, folder, training, tile_size, overlap)))\n", " tile_indices = tile_indices.repartition(num_partitions)\n", + " tile_indices.cache()\n", " tiles = tile_indices.map(lambda tile_index: process_tile_index(tile_index, folder, training))\n", " filtered_tiles = tiles.filter(lambda tile: keep_tile(tile, tile_size, tissue_threshold))\n", " samples = filtered_tiles.flatMap(lambda tile: process_tile(tile, sample_size, grayscale))\n", @@ -705,7 +740,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": true, + "collapsed": false, "deletable": true, "editable": true }, @@ -717,7 +752,7 @@ "\n", "# Settings\n", "training = True\n", - "tile_size = 1024\n", + "tile_size = 256\n", "sample_size = 256\n", "grayscale = False\n", "num_partitions = 20000\n", @@ -799,7 +834,8 @@ "metadata": { "collapsed": false, "deletable": true, - "editable": true + "editable": true, + "scrolled": false }, "outputs": [], "source": [
