This is an automated email from the ASF dual-hosted git repository.

okislal pushed a commit to branch asf-site
in repository https://gitbox.apache.org/repos/asf/madlib-site.git


The following commit(s) were added to refs/heads/asf-site by this push:
     new 9a96aa0  MADlib image loader:  load dataset from disk
9a96aa0 is described below

commit 9a96aa0b68052b12a1ff6b416758501aa1a9606e
Author: Domino Valdano <dvald...@pivotal.io>
AuthorDate: Tue Jul 16 14:29:47 2019 -0700

    MADlib image loader:  load dataset from disk
    
    Adds to the madlib_image_loader module ability to load images
    directly from disk (instead of having to be in numpy arrays).
    
    New functionality can either be called as a module or run from
    the command line.  (Loading from a numpy array still only
    supported when called as a module.)
    
    Dataset to be loaded is expected to be organized by image
    category/class.  Each directory name labels a specific class,
    and only images from that class should be in that directory.
    
    An additional img_name column is added to the table when used in
    this way, to help with tracking which image is which later.
    
    Madlib Image Loader Demo notebook has been updated to include a
    minor interface change (original API endpoint renamed to
    load_dataset_from_np) and an example of loading from disk with
    newly added endpoint load_dataset_from_disk.
    
    Closes #16
    
    Co-authored-by:  Nikhil Kak <n...@pivotal.io>
---
 .../Deep-learning/Madlib Image Loader Demo.ipynb   | 482 ++++++++++---------
 .../Deep-learning/madlib_image_loader.py           | 514 ++++++++++++++++-----
 2 files changed, 639 insertions(+), 357 deletions(-)

diff --git a/community-artifacts/Deep-learning/Madlib Image Loader Demo.ipynb 
b/community-artifacts/Deep-learning/Madlib Image Loader Demo.ipynb
index 07570f6..aba3a1f 100644
--- a/community-artifacts/Deep-learning/Madlib Image Loader Demo.ipynb  
+++ b/community-artifacts/Deep-learning/Madlib Image Loader Demo.ipynb  
@@ -35,11 +35,11 @@
    "outputs": [],
    "source": [
     "# Add community-artifacts to PYTHON_PATH\n",
-    "     # TIP:  You can skip this cell if working directory of notebook is 
community-artifacts\n",
+    "     # TIP:  You can skip this cell if madlib_image_loader.py is already 
in your current directory\n",
     "\n",
     "home = %env HOME\n",
     "     # TIP:  Change home,'workspace' to wherever you have cloned 
madlib-site repo\n",
-    "madlib_site_dir = 
os.path.join(home,'workspace','madlib-site','community-artifacts')\n",
+    "madlib_site_dir = 
os.path.join(home,'workspace','madlib-site','community-artifacts', 
'Deep-learning')\n",
     "sys.path.append(madlib_site_dir)"
    ]
   },
@@ -85,7 +85,7 @@
    "outputs": [],
    "source": [
     "# Initialize ImageLoader (increase num_workers to run faster)\n",
-    "iloader = ImageLoader(num_workers=5, db_creds=db_creds)"
+    "iloader = ImageLoader(num_workers=8, db_creds=db_creds)"
    ]
   },
   {
@@ -98,241 +98,174 @@
      "output_type": "stream",
      "text": [
       "MainProcess: Connected to madlib db.\n",
-      "Appending to table cifar_10_test in madlib db\n",
-      "Spawning 5 workers...\n",
-      "Initializing PoolWorker-1 [pid 240]\n",
-      "PoolWorker-1: Created temporary directory PoolWorker-1\n",
-      "Initializing PoolWorker-2 [pid 241]\n",
-      "PoolWorker-2: Created temporary directory PoolWorker-2\n",
-      "Initializing PoolWorker-3 [pid 242]\n",
-      "PoolWorker-3: Created temporary directory PoolWorker-3\n",
-      "Initializing PoolWorker-4 [pid 243]\n",
-      "PoolWorker-4: Created temporary directory PoolWorker-4\n",
-      "Initializing PoolWorker-5 [pid 245]\n",
+      "Executing: CREATE TABLE cifar_10_test (id SERIAL, x REAL[], y TEXT)\n",
+      "CREATE TABLE\n",
+      "Created table cifar_10_test in madlib db\n",
+      "Spawning 8 workers...\n",
+      "Initializing PoolWorker-1 [pid 13756]\n",
+      "PoolWorker-1: Created temporary directory /tmp/madlib_l1UVaZIxxr\n",
+      "Initializing PoolWorker-2 [pid 13757]\n",
+      "PoolWorker-2: Created temporary directory /tmp/madlib_AQTX6xmhMK\n",
+      "Initializing PoolWorker-3 [pid 13758]\n",
+      "PoolWorker-3: Created temporary directory /tmp/madlib_7QOFnBn8jB\n",
+      "Initializing PoolWorker-4 [pid 13759]\n",
+      "PoolWorker-4: Created temporary directory /tmp/madlib_1AgOTkwfPn\n",
+      "Initializing PoolWorker-5 [pid 13761]\n",
       "PoolWorker-1: Connected to madlib db.\n",
-      "PoolWorker-5: Created temporary directory PoolWorker-5\n",
+      "Initializing PoolWorker-6 [pid 13762]\n",
+      "PoolWorker-5: Created temporary directory /tmp/madlib_RzYo51sdaR\n",
+      "Initializing PoolWorker-7 [pid 13764]\n",
+      "PoolWorker-6: Created temporary directory /tmp/madlib_uip4DCxC9m\n",
       "PoolWorker-2: Connected to madlib db.\n",
+      "PoolWorker-7: Created temporary directory /tmp/madlib_8Urhpm5TkR\n",
+      "Initializing PoolWorker-8 [pid 13766]\n",
       "PoolWorker-3: Connected to madlib db.\n",
+      "PoolWorker-8: Created temporary directory /tmp/madlib_SzPIAtCqyb\n",
       "PoolWorker-4: Connected to madlib db.\n",
       "PoolWorker-5: Connected to madlib db.\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0000.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0000.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0000.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0000.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0000.tmp\n",
+      "PoolWorker-6: Connected to madlib db.\n",
+      "PoolWorker-7: Connected to madlib db.\n",
+      "PoolWorker-8: Connected to madlib db.\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0000.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0000.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0000.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0000.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0000.tmp\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0000.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0000.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0000.tmp\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0001.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0001.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0001.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0001.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0001.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0001.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0001.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0001.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0001.tmp\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0001.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0001.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0001.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0001.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0002.tmp\n",
       "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0002.tmp\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0002.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0002.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0002.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0002.tmp\n",
-      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0003.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0003.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0003.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0003.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0003.tmp\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0002.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0002.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0002.tmp\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0002.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0002.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0002.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0002.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0003.tmp\n",
+      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0004.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0004.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0004.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0004.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0004.tmp\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0005.tmp\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0005.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0005.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0005.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0005.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0003.tmp\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0003.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0003.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0003.tmp\n",
       "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0003.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0003.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0003.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0004.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0004.tmp\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0006.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0006.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0006.tmp\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0006.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0006.tmp\n",
       "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0004.tmp\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0004.tmp\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0004.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0004.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0004.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0004.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0005.tmp\n",
+      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_AQTX6xmhMK/cifar_10_test0005.tmp\n",
+      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_1AgOTkwfPn/cifar_10_test0005.tmp\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0007.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0007.tmp\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0007.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0007.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0007.tmp\n",
       "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0008.tmp\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0008.tmp\n",
-      "PoolWorker-2: Wrote 1000 images to 
/tmp/madlib_xlKP6JhnfV/cifar_10_test0008.tmp\n",
-      "PoolWorker-4: Wrote 1000 images to 
/tmp/madlib_hmXBkZ2Rd5/cifar_10_test0008.tmp\n",
-      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_4v2Q1jvkZs/cifar_10_test0008.tmp\n",
-      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_7QOFnBn8jB/cifar_10_test0005.tmp\n",
+      "PoolWorker-6: Wrote 1000 images to 
/tmp/madlib_uip4DCxC9m/cifar_10_test0005.tmp\n",
+      "PoolWorker-7: Wrote 1000 images to 
/tmp/madlib_8Urhpm5TkR/cifar_10_test0005.tmp\n",
+      "PoolWorker-8: Wrote 1000 images to 
/tmp/madlib_SzPIAtCqyb/cifar_10_test0005.tmp\n",
+      "PoolWorker-5: Wrote 1000 images to 
/tmp/madlib_RzYo51sdaR/cifar_10_test0005.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0006.tmp\n",
       "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-4: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-5: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0009.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0009.tmp\n",
-      "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0010.tmp\n",
-      "PoolWorker-3: Wrote 1000 images to 
/tmp/madlib_kfSWAjQUxH/cifar_10_test0010.tmp\n",
-      "PoolWorker-3: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
+      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_KdLWwZ322f/cifar_10_test0011.tmp\n",
+      "PoolWorker-1: Wrote 1000 images to 
/tmp/madlib_l1UVaZIxxr/cifar_10_test0007.tmp\n",
       "PoolWorker-1: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-2: Removed temporary directory PoolWorker-2\n",
-      "PoolWorker-4: Removed temporary directory PoolWorker-4\n",
-      "PoolWorker-5: Removed temporary directory PoolWorker-5\n",
-      "PoolWorker-3: Removed temporary directory PoolWorker-3\n",
-      "PoolWorker-1: Removed temporary directory PoolWorker-1\n",
-      "Done!  Loaded 50000 images in 50.5676851273s\n"
+      "PoolWorker-4: Removed temporary directory /tmp/madlib_1AgOTkwfPn\n",
+      "PoolWorker-3: Removed temporary directory /tmp/madlib_7QOFnBn8jB\n",
+      "PoolWorker-7: Removed temporary directory /tmp/madlib_8Urhpm5TkR\n",
+      "PoolWorker-5: Removed temporary directory /tmp/madlib_RzYo51sdaR\n",
+      "PoolWorker-2: Removed temporary directory /tmp/madlib_AQTX6xmhMK\n"
      ]
-    }
-   ],
-   "source": [
-    "# Save images to temporary directories and load into database\n",
-    "iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', 
append=False, img_names=None)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
+    },
     {
-     "ename": "SyntaxError",
-     "evalue": "invalid syntax (<ipython-input-2-c4a66f4c96da>, line 1)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  File 
\u001b[0;32m\"<ipython-input-2-c4a66f4c96da>\"\u001b[0;36m, line 
\u001b[0;32m1\u001b[0m\n\u001b[0;31m    TODO:  no_temp_files option currently 
has a bug--it looks like it succeeds, but table ens up being 
empty.\u001b[0m\n\u001b[0m        
^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid 
syntax\n"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PoolWorker-1: Removed temporary directory /tmp/madlib_l1UVaZIxxr\n",
+      "PoolWorker-6: Removed temporary directory /tmp/madlib_uip4DCxC9m\n",
+      "PoolWorker-8: Removed temporary directory /tmp/madlib_SzPIAtCqyb\n",
+      "Done!  Loaded 50000 images in 52.8276519775s\n",
+      "8 workers terminated.\n"
      ]
     }
    ],
    "source": [
-    "TODO:  no_temp_files option currently has a bug--it looks like it 
succeeds, but table ens up being empty."
+    "# Save images to temporary directories and load into database\n",
+    "iloader.load_dataset_from_np(data_x, data_y, 'cifar_10_test', 
append=False)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Appending to table cifar_10_test in madlib db\n",
-      "Spawning 5 workers...\n",
-      "Initializing PoolWorker-6 [pid 279]\n",
-      "Initializing PoolWorker-7 [pid 280]\n",
-      "Initializing PoolWorker-8 [pid 281]\n",
-      "Initializing PoolWorker-9 [pid 284]\n",
-      "PoolWorker-6: Connected to madlib db.\n",
-      "Initializing PoolWorker-10 [pid 285]\n",
-      "PoolWorker-7: Connected to madlib db.\n",
-      "PoolWorker-8: Connected to madlib db.\n",
-      "PoolWorker-9: Connected to madlib db.\n",
-      "PoolWorker-10: Connected to madlib db.\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-9: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-10: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-8: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-7: Loaded 1000 images into cifar_10_test\n",
-      "PoolWorker-6: Loaded 1000 images into cifar_10_test\n",
-      "Done!  Loaded 50000 images in 18.1218080521s\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Same thing, but without writing out any temporary files; everything 
handled in-memory.\n",
-    "#   Should run about twice as fast.\n",
+    "#   Should run about twice as fast, but not working yet.\n",
     "\n",
-    "iloader.ROWS_PER_FILE = 1000  # Try adjusting this downward, if running 
low on memory\n",
-    "iloader.load_np_array_to_table(data_x, data_y, 'cifar_10_test', 
append=True, no_temp_files=True)"
+    "#iloader.ROWS_PER_FILE = 1000  # Try adjusting this downward, if running 
low on memory\n",
+    "#iloader.load_dataset_from_disk(data_x, data_y, 'cifar_10_test2', 
append=True, no_temp_files=True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -343,93 +276,181 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Executing: CREATE TABLE fashion_mnist_test (id SERIAL, x REAL[], y 
TEXT)\n",
-      "CREATE TABLE\n",
-      "Created table fashion_mnist_test in madlib db\n",
-      "Spawning 5 workers...\n",
-      "Initializing PoolWorker-11 [pid 317]\n",
-      "Initializing PoolWorker-12 [pid 318]\n",
-      "Initializing PoolWorker-13 [pid 319]\n",
-      "Initializing PoolWorker-14 [pid 320]\n",
-      "Initializing PoolWorker-15 [pid 321]\n",
+      "MainProcess: Connected to madlib db.\n",
+      "Appending to table fashion_mnist_test in madlib db\n",
+      "Spawning 8 workers...\n",
+      "Initializing PoolWorker-9 [pid 14871]\n",
+      "PoolWorker-9: Created temporary directory /tmp/madlib_1j4HTsicJ8\n",
+      "Initializing PoolWorker-10 [pid 14872]\n",
+      "PoolWorker-10: Created temporary directory /tmp/madlib_XUO9OeFCRp\n",
+      "Initializing PoolWorker-11 [pid 14873]\n",
+      "PoolWorker-11: Created temporary directory /tmp/madlib_XXoZi8qgE1\n",
+      "Initializing PoolWorker-12 [pid 14874]\n",
+      "PoolWorker-12: Created temporary directory /tmp/madlib_hc0Qt4WpIv\n",
+      "Initializing PoolWorker-13 [pid 14875]\n",
+      "Initializing PoolWorker-14 [pid 14877]\n",
+      "PoolWorker-13: Created temporary directory /tmp/madlib_2JwMfqwlOC\n",
+      "PoolWorker-9: Connected to madlib db.\n",
+      "PoolWorker-14: Created temporary directory /tmp/madlib_0kkBdCmGO4\n",
+      "Initializing PoolWorker-15 [pid 14879]\n",
+      "PoolWorker-10: Connected to madlib db.\n",
+      "Initializing PoolWorker-16 [pid 14880]\n",
+      "PoolWorker-15: Created temporary directory /tmp/madlib_NGqaV8pYyu\n",
+      "PoolWorker-16: Created temporary directory /tmp/madlib_mkO0vnSoJ8\n",
       "PoolWorker-11: Connected to madlib db.\n",
       "PoolWorker-12: Connected to madlib db.\n",
-      "PoolWorker-13: Connected to madlib db.\n",
       "PoolWorker-14: Connected to madlib db.\n",
+      "PoolWorker-13: Connected to madlib db.\n",
       "PoolWorker-15: Connected to madlib db.\n",
+      "PoolWorker-16: Connected to madlib db.\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0000.tmp\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0001.tmp\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0001.tmp\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0001.tmp\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0002.tmp\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0002.tmp\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0002.tmp\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0003.tmp\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0003.tmp\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0004.tmp\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0004.tmp\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0004.tmp\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0005.tmp\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-12: Wrote 1000 images to 
/tmp/madlib_hc0Qt4WpIv/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-13: Wrote 1000 images to 
/tmp/madlib_2JwMfqwlOC/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0005.tmp\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
-      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-13: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-12: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0006.tmp\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0006.tmp\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0006.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0006.tmp\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0006.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0006.tmp\n",
+      "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Wrote 1000 images to 
/tmp/madlib_XUO9OeFCRp/fashion_mnist_test0007.tmp\n",
+      "PoolWorker-15: Wrote 1000 images to 
/tmp/madlib_NGqaV8pYyu/fashion_mnist_test0007.tmp\n",
+      "PoolWorker-14: Wrote 1000 images to 
/tmp/madlib_0kkBdCmGO4/fashion_mnist_test0007.tmp\n",
+      "PoolWorker-16: Wrote 1000 images to 
/tmp/madlib_mkO0vnSoJ8/fashion_mnist_test0007.tmp\n",
+      "PoolWorker-11: Wrote 1000 images to 
/tmp/madlib_XXoZi8qgE1/fashion_mnist_test0007.tmp\n",
+      "PoolWorker-9: Wrote 1000 images to 
/tmp/madlib_1j4HTsicJ8/fashion_mnist_test0007.tmp\n",
       "PoolWorker-14: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-16: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-10: Loaded 1000 images into fashion_mnist_test\n",
       "PoolWorker-15: Loaded 1000 images into fashion_mnist_test\n",
-      "Done!  Loaded 60000 images in 11.783424139s\n"
+      "PoolWorker-11: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-9: Loaded 1000 images into fashion_mnist_test\n",
+      "PoolWorker-12: Removed temporary directory /tmp/madlib_hc0Qt4WpIv\n",
+      "PoolWorker-13: Removed temporary directory /tmp/madlib_2JwMfqwlOC\n",
+      "PoolWorker-14: Removed temporary directory /tmp/madlib_0kkBdCmGO4\n",
+      "PoolWorker-15: Removed temporary directory /tmp/madlib_NGqaV8pYyu\n",
+      "PoolWorker-9: Removed temporary directory /tmp/madlib_1j4HTsicJ8\n",
+      "PoolWorker-16: Removed temporary directory /tmp/madlib_mkO0vnSoJ8\n",
+      "PoolWorker-10: Removed temporary directory /tmp/madlib_XUO9OeFCRp\n",
+      "PoolWorker-11: Removed temporary directory /tmp/madlib_XXoZi8qgE1\n",
+      "Done!  Loaded 60000 images in 18.9483509064s\n",
+      "8 workers terminated.\n"
      ]
     }
    ],
    "source": [
-    "iloader.load_np_array_to_table(data_x, data_y, 'fashion_mnist_test', 
append=False, no_temp_files=True)"
+    "iloader.load_dataset_from_np(data_x, data_y, 'fashion_mnist_test', 
append=True)"
    ]
   },
   {
@@ -437,7 +458,10 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# Load a dataset directly from disk\n",
+    "iloader.load_dataset_from_disk('/tmp/image_test/cifar10', 
'fromdisk_test')"
+   ]
   }
  ],
  "metadata": {
diff --git a/community-artifacts/Deep-learning/madlib_image_loader.py 
b/community-artifacts/Deep-learning/madlib_image_loader.py
old mode 100644
new mode 100755
index b999036..09a170d
--- a/community-artifacts/Deep-learning/madlib_image_loader.py
+++ b/community-artifacts/Deep-learning/madlib_image_loader.py
@@ -1,19 +1,48 @@
+#!/usr/bin/env python
 #
-# Python module to load images into postgres or greenplum db, for 
+# Python module to load images into postgres or greenplum db, for
 #  use with madlib deep_learning module.
 #
-# The format of the image tables created will have at least 3 rows:
+# The format of the image tables created will have at least 3 columns:
 #     (id SERIAL, x REAL[], y).  Each row is 1 image,
 #     with image data represented by x (a 3D array of type "real"), and
 #     y (category) as text.  id is just a unique identifier for each image,
-#     so they don't get mixed up during prediction.
+#     so they don't get mixed up during prediction.  If images are being
+#     loaded from disk, there will be an additional img_name column containing
+#     the filename of the image, to help identify later.
 #
 #   ImageLoader.ROWS_PER_FILE = 1000 by default; this is the number of rows per
 #      temporary file (or StringIO buffer) loaded at once.
 #
 
-#   User API is through ImageLoader and DbCredentials class constructors,
-#     and ImageLoader.load_np_array_to_table
+# There are two ways of using this module.  One is to load it with:
+#  import madlib_image_loader
+#     (make sure it is in a directory python knows about.
+#      Try adding the directory to PYTHONPATH if it can't find it.)
+#
+# and use the exposed classes and functions described below.
+#
+# The second way is to run it directly, passing all options on the
+#  command line.  The second way only supports loading images
+#  from disk, whereas the first way can be used either to do that or
+#  to load them from a dataset already in an existing numpy array (such
+#  as the datasets that come prepackaged with keras).
+#
+#   The module API is pretty simple, only involving two classes:
+#     ImageLoader
+#     DbCredentials
+#
+#     two functions (in addition to the class constructors):
+#
+#   ImageLoader.load_dataset_from_np
+#   ImageLoader.load_dataset_from_disk
+#
+#     and one adjustable parameter (change if default is not working well):
+#
+#   ImageLoader.ROWS_PER_FILE=1000
+#
+#
+#   Workflow
 #
 #     1. Create objects:
 #
@@ -22,53 +51,108 @@
 #
 #           iloader = ImageLoader(db_creds, num_workers, table_name=None)
 #
-#     2. Perform parallel image loading:
+#     2a. Perform parallel image loading from numpy arrays:
+#
+#           iloader.load_dataset_from_np(data_x, data_y, table_name,
+#                                        append=False, no_temp_files=False)
+#
+#       data_x contains image data in np.array format, and data_y is a 1D 
np.array
+#           of the image categories (labels).
+#
+#       Default database credentials are: localhost port 5432, madlib db, no
+#           password.  Calling the default constructor DbCredentials() will 
attempt
+#           to connect using these credentials, but any of them can be 
overriden.
+#
+#       append=False attempts to create a new table, while append=True appends 
more
+#           images to an existing table.
+#
+#       If the user passes a table_name while creating ImageLoader object, it 
will
+#           be used for all further calls to load_dataset_from_np.  It can be
+#           changed by passing it as a parameter during the actual call to
+#           load_dataset_from_np, and if so future calls will load to that 
table
+#           name instead.  This avoids needing to pass the table_name again 
every
+#           time, but also allows it to be changed at any time.
+#
+#       EXPERIMENTAL:  If no_temp_files=True, the operation will happen without
+#                      writing out the tables to temporary files before 
loading them.
+#                      Instead, an in-memory filelike buffer (StringIO) will 
be used
+#                      to build the tables before loading.  Currently not 
working,
+#                      for unknown reason.
+#
+#  or,
 #
-#           iloader.load_np_array_to_table(data_x, data_y, table_name,
-#                                          append=False, img_names=None,
-#                                          no_temp_files=False)
+#     2b. Perform parallel image loading from disk:
 #
-#   data_x contains image data in np.array format, and data_y is a 1D np.array
-#       of the image categories (labels).
+#           load_dataset_from_disk(self, root_dir, table_name, 
num_labels='all',
+#               append=False, no_temp_files=False):
 #
-#   Default database credentials are: localhost port 5432, madlib db, no
-#       password.  Calling the default constructor DbCredentials() will attempt
-#       to connect using these credentials, but any of them can be overriden.
+#       Calling this function instead will look in root_dir on the local disk 
of
+#           wherever this is being run.  It will skip over any files in that
+#           directory, but will load images contained in each of its
+#           subdirectories.  The images should be organized by category/class,
+#           where the name of each subdirectory is the label for the images
+#           contained within it.
 #
-#   append=False attempts to create a new table, while append=True appends 
more 
-#       images to an existing table.
+#       The table_name, append, and no_temp_files parameters are the same as
+#           above.  num_labels is an optional parameter which can be used to
+#           restrict the number of labels (image classes) loaded, even if more
+#           are found in root_dir.  For example, for a large dataset you may
+#           have hundreds of labels, but only wish to use a subset of that
+#           containing a few dozen.
 #
-#   If the user passes a table_name while creating ImageLoader object, it will
-#       be used for all further calls to load_np_array_to_table.  It can be
-#       changed by passing it as a parameter during the actual call to
-#       load_np_array_to_table, and if so future calls will load to that table
-#       name instead.  This avoids needing to pass the table_name again every
-#       time, but also allows it to be changed at any time.
 #
-#   EXPERIMENTAL:  If no_temp_files=True, the operation will happen without
-#                  writing out the tables to temporary files before loading 
them.
-#                  Instead, an in-memory filelike buffer (StringIO) will be 
used
-#                  to build the tables before loading.
+# If you want to load an image dataset from disk, but don't feel like writing
+#  any python code to call the API, you can just run this file directly, 
passing
+#  these parameters on the command line.
+#
+# usage: madlib_image_loader.py [-h] [-r ROOT_DIR] [-n NUM_LABELS] [-d DB_NAME]
+#                               [-a] [-w NUM_WORKERS] [-p PORT] [-U USERNAME]
+#                               [-t HOST] [-P PASSWORD] [-m]
+#                               table_name
+#
+# positional arguments:
+#   table_name            Name of table where images should be loaded
+#
+# optional arguments:
+#   -h, --help            show this help message and exit
+#   -r ROOT_DIR, --root-dir ROOT_DIR
+#                         Root directory of image directories (default: .)
+#   -n NUM_LABELS, --num-labels NUM_LABELS
+#                         Number of image labels (categories) to load. 
(default:
+#                         all)
+#   -d DB_NAME, --db-name DB_NAME
+#                         Name of database where images should be loaded
+#                         (default: madlib)
+#   -a, --append          Name of database where images should be loaded
+#                         (default: False)
+#   -w NUM_WORKERS, --num-workers NUM_WORKERS
+#                         Name of parallel workers. (default: 5)
+#   -p PORT, --port PORT  database server port (default: 5432)
+#   -U USERNAME, --username USERNAME
+#                         database user name (default: None)
+#   -t HOST, --host HOST  database server host. (default: localhost)
+#   -P PASSWORD, --password PASSWORD
+#                         database user password (default: None)
+#   -m, --no-temp-files   no temporary files, construct all image tables in-
+#                         memory (default: False)
 #
-#   img_names:  this is currently unused, but we plan to use it when we add
-#               support for loading images from disk.
 
-import numpy as np
-import sys
+import argparse
+from cStringIO import StringIO
+from multiprocessing import Pool, current_process
 import os
-import re
-import gc
 import random
-import string
-import psycopg2 as db
-from multiprocessing import Pool, current_process
+import signal
 from shutil import rmtree
+import string
 import time
-import signal
 import traceback
-from cStringIO import StringIO
 
-class SignalException (Exception):
+import psycopg2 as db
+import numpy as np
+from PIL import Image
+
+class SignalException(Exception):
     pass
 
 def _worker_sig_handler(signum, frame):
@@ -76,7 +160,7 @@ def _worker_sig_handler(signum, frame):
         msg = "Received SIGINT in worker."
     elif signum == signal.SIGTERM:
         msg = "Received SIGTERM in worker."
-        _worker_cleanup()
+        _worker_cleanup(None)
     elif signum == signal.SIGSEGV:
         msg = "Received SIGSEGV in worker."
         traceback.print_stack(frame)
@@ -85,8 +169,12 @@ def _worker_sig_handler(signum, frame):
 
     raise SignalException(msg)
 
-def _call_worker(data):
-    try:
+def _call_disk_worker(label):
+    global iloader
+    iloader.call_disk_worker(label)
+
+def _call_np_worker(data): # data = list of (x, y) or (x, y, num_images) tuples
+    try:                   #        of length self.ROWS_PER_FILE
         if iloader.no_temp_files:
             iloader._just_load(data)
         else:
@@ -102,12 +190,14 @@ def _call_worker(data):
         print traceback.format_exc()
         raise e
 
+# dummy param needed so this can be called for
+# each worker from Pool.map()
 def _worker_cleanup(dummy):
-    # Called when worker process is terminated
     if iloader.tmp_dir:
         iloader.rm_temp_dir()
 
-def init_worker(mother_pid, table_name, append, no_temp_files, db_creds):
+def init_worker(mother_pid, table_name, append, no_temp_files, db_creds,
+                from_disk, root_dir=None):
     pr = current_process()
     print("Initializing {0} [pid {1}]".format(pr.name, pr.pid))
 
@@ -116,7 +206,8 @@ def init_worker(mother_pid, table_name, append, 
no_temp_files, db_creds):
         iloader.mother_pid = mother_pid
         iloader.table_name = table_name
         iloader.no_temp_files = no_temp_files
-        iloader.img_names = None
+        iloader.root_dir = root_dir
+        iloader.from_disk = from_disk
         signal.signal(signal.SIGINT, _worker_sig_handler)
         signal.signal(signal.SIGSEGV, _worker_sig_handler)
         if not no_temp_files:
@@ -154,10 +245,21 @@ class ImageLoader:
         self.mother = False
         self.pr_name = current_process().name
         self.table_name = table_name
+        self.root_dir = None
+        self.pool = None
+        self.no_temp_files = None
 
         global iloader  # Singleton per process
         iloader = self
 
+    def terminate_workers(self):
+        if iloader.pool:
+            iloader.pool.map(_worker_cleanup, [0] * self.num_workers)
+
+        self.pool.terminate()
+        self.pool = None
+        print("{} workers terminated.".format(self.num_workers))
+
     def _random_string(self):
         return ''.join([random.choice(string.ascii_letters + string.digits)\
             for n in xrange(10)])
@@ -165,14 +267,14 @@ class ImageLoader:
     def mk_temp_dir(self):
         self.tmp_dir = '/tmp/madlib_{0}'.format(self._random_string())
         os.mkdir(self.tmp_dir)
-        print("{0}: Created temporary directory {0}"\
+        print("{0}: Created temporary directory {1}"\
             .format(self.pr_name, self.tmp_dir))
 
     def rm_temp_dir(self):
         rmtree(self.tmp_dir)
-        self.tmp_dir = None
-        print("{0}: Removed temporary directory {0}"\
+        print("{0}: Removed temporary directory {1}"\
             .format(self.pr_name, self.tmp_dir))
+        self.tmp_dir = None
 
     def db_connect(self):
         if self.db_cur:
@@ -218,29 +320,35 @@ class ImageLoader:
             self.db_conn.close()
             self.db_conn = None
 
-    def _gen_lines(self, data, img_names=None):
+    def _gen_lines(self, data):
+        def f(x):
+            x = str(x.tolist())
+            return x.replace('[','{').replace(']','}')
+
         for i, row in enumerate(data):
-            x, y = row
-            line = str(x.tolist())
-            line = line.replace('[','{').replace(']','}')
-            if img_names:
-                line = '{0}|{1}|{2}\n'.format(line, y, img_names[i])
+            if len(row) == 3:
+                x, y, image_name = row
+                yield '{0}|{1}|{2}\n'.format(f(x), y, image_name)
+            elif len(row) == 2:
+                x, y = row
+                yield '{0}|{1}\n'.format(f(x), y)
             else:
-                line = '{0}|{1}\n'.format(line, y)
-            yield line
+                raise RuntimeError("Cannot write invalid row to table:\n{0}"\
+                    .format(row))
 
-    def _write_file(self, file_object, data, img_names=None):
-        lines = self._gen_lines(data, img_names)
+    def _write_file(self, file_object, data):
+        lines = self._gen_lines(data)
         file_object.writelines(lines)
 
+    # This is default value, can be overriden by user, by setting
+    #   iloader.ROWS_PER_FILE after ImageLoader is created.
     ROWS_PER_FILE = 1000
 
     # Copies from open file-like object f into database
     def _copy_into_db(self, f, data):
         table_name = self.table_name
-        img_names = self.img_names
 
-        if img_names:
+        if self.from_disk:
             self.db_cur.copy_from(f, table_name, sep='|', columns=['x','y',
                                                                    'img_name'])
         else:
@@ -280,39 +388,7 @@ class ImageLoader:
         with file(filename, 'r') as f:
             self._copy_into_db(f, data)
 
-    def load_np_array_to_table(self, data_x, data_y, table_name=None,
-                               append=False, img_names=None,
-                               no_temp_files=False):
-        """
-        Loads a numpy array into db.  For append=False, creates a new table and
-            loads the data.  For append=True, appends data to existing table.
-            Throws an exception if append=False and table_name already exists,
-            or if append=True and table_name does not exist.  Makes use of
-            worker processes initialized during ImageLoader object creation to
-            load in parallel.
-        @data_x independent variable data, a numpy array of images.  Size of
-            first dimension is number of images.  Rest of dimensions determined
-            by image resolution and number of channels.
-        @data_y dependent variable data (image classes), as an numpy array
-        @table_name Name of table in db to load data into
-        @append Whether to create a new table (False) or append to an existing
-            one (True).  If unspecified, default is False @img_names If not 
None,
-            a list of the image names corresponding to elements of the data_x
-            numpy array.  If present, this is included as a column in the 
table.
-        @no_temp_files If specified, no temporary files are written--all
-            operations are performed in-memory.
-
-        """
-        start_time = time.time()
-        self.mother = True
-        self.append = append
-        if table_name:
-            self.table_name = table_name
-
-        if not self.table_name:
-            raise ValueError("Must specify table_name either in ImageLoader"
-                " constructor or in load_np_array_to_table params!")
-
+    def _validate_input_and_create_table(self, data_x=[], data_y=[]):
         if len(data_x) != len(data_y):
             raise ValueError("Invalid dataset passed, number of labels in "
                              "data_y ({0}) does not match number of images "
@@ -339,35 +415,71 @@ class ImageLoader:
         else:
             # Create new table
             try:
-                if img_names:
+                if self.from_disk:
                     sql = "CREATE TABLE {0} (id SERIAL, x REAL[], y TEXT,\
                         img_name TEXT)".format(self.table_name)
                 else:
                     sql = "CREATE TABLE {0} (id SERIAL, x REAL[], y TEXT)"\
                         .format( self.table_name)
                 self.db_exec(sql)
-            except(Exception, db.DatabaseError):
-                raise RuntimeError("Table {0} already exists in {1} db.  Use "
+            except db.DatabaseError as e:
+                raise RuntimeError("{0} while creating {1} in db {2}.\n"
+                                   "If the table already exists, you can use "
                                    "append=True to append more images to it."
-                    .format(self.table_name, self.db_creds.db_name))
+                                .format(e.message.strip(), self.table_name,
+                                        self.db_creds.db_name))
 
             print "Created table {0} in {1} db".format(self.table_name,
                 self.db_creds.db_name)
 
         self.db_close()
 
+    def load_dataset_from_np(self, data_x, data_y, table_name=None,
+                             append=False, no_temp_files=False):
+        """
+        Loads a numpy array into db.  For append=False, creates a new table and
+            loads the data.  For append=True, appends data to existing table.
+            Throws an exception if append=False and table_name already exists,
+            or if append=True and table_name does not exist.  Makes use of
+            worker processes initialized during ImageLoader object creation to
+            load in parallel.
+        @data_x independent variable data, a numpy array of images.  Size of
+            first dimension is number of images.  Rest of dimensions determined
+            by image resolution and number of channels.
+        @data_y dependent variable data (image classes), as an numpy array
+        @table_name Name of table in db to load data into
+        @append Whether to create a new table (False) or append to an existing
+            one (True).  If unspecified, default is False
+        @no_temp_files If specified, no temporary files are written--all
+            operations are performed in-memory.
+
+        """
+        start_time = time.time()
+        self.mother = True
+        self.from_disk = False
+        self.append = append
+        if table_name:
+            self.table_name = table_name
+
+        if not self.table_name:
+            raise ValueError("Must specify table_name either in ImageLoader"
+                " constructor or in load_dataset_from_np params!")
+
+        self._validate_input_and_create_table(data_x, data_y)
+
         data_y = data_y.flatten()
         data = zip(data_x, data_y)
 
-        print("Spawning {0} workers...".format(self.num_workers))
-
-        p = Pool(processes=self.num_workers,
-                 initializer=init_worker,
-                 initargs=(current_process().pid,
-                           self.table_name,
-                           self.append,
-                           no_temp_files,
-                           self.db_creds))
+        if not self.pool:
+            print("Spawning {0} workers...".format(self.num_workers))
+            self.pool = Pool(processes=self.num_workers,
+                     initializer=init_worker,
+                     initargs=(current_process().pid,
+                               self.table_name,
+                               self.append,
+                               no_temp_files,
+                               self.db_creds,
+                               False))
 
 
         datas = []
@@ -383,36 +495,182 @@ class ImageLoader:
         #  ( inside x can also be a numpy tensor with several dimensions, but y
         #    should just be a single scalar )
         #
-        #  multiprocessing library will call _call_worker() in some worker for
+        #  multiprocessing library will call _call_np_worker() in some worker 
for
         #   each file, splitting the list of files up into roughly equal chunks
         #   for each worker to handle.  For example, if there are 500 files and
-        #   5 workers, each will handle about 100 files, and _call_worker() 
will
-        #   be called 100 times, each time with a different file full of 
images.
-        #
+        #   5 workers, each will handle about 100 files, and _call_np_worker()
+        #   will be called 100 times, each time with a different file full
+        #   of images.
 
         try:
-            p.map(_call_worker, datas)
+            self.pool.map(_call_np_worker, datas)
         except(Exception) as e:
-            p.map(_worker_cleanup, [0] * self.num_workers)
-            p.terminate()
+            self.terminate_workers()
             raise e
 
-        p.map(_worker_cleanup, [0] * self.num_workers)
         end_time = time.time()
         print("Done!  Loaded {0} images in {1}s"\
             .format(len(data), end_time - start_time))
-        p.terminate()
-
-# Uncommenting the code below can be useful for testing, but will be removed
-#  once we add a main() function intended to be called by a user who wants to
-#  load images from disk.
-#
-# def test_loading_nparray(data_x, data_y):
-#     db_creds = DbCredentials(port=5094)
-#     iloader = ImageLoader(num_workers=5, table_name='cifar_10_test',
-#                           db_creds=db_creds)
-#     iloader.load_np_array_to_table(data_x, data_y, append=True)
-# 
-# if __name__ == '__main__':
-#     train_data, _ = cifar10.load_data()
-#     test_loading_nparray(*train_data)
+
+        self.terminate_workers()
+
+    def call_disk_worker(self, label):
+        dir_name = os.path.join(self.root_dir,label)
+
+        filenames = os.listdir(dir_name)
+        data = []
+        first_image = Image.open(os.path.join(self.root_dir, label, 
filenames[0]))
+        for index, filename in enumerate(filenames):
+            image = Image.open(os.path.join(self.root_dir, label, filename))
+            x = np.array(image)
+            if x.shape != np.array(first_image).shape:
+                raise Exception("Images {0} and {1} in label {2} have 
different "
+                                "shapes {0}:{3} {1}:{4}.  Make sure that all 
the "
+                                "images are of the same shape."\
+                    .format(filenames[0], filename, label,
+                            first_image.shape, x.shape))
+
+            data.append((x, label, filename))
+            if (index % self.ROWS_PER_FILE) == (self.ROWS_PER_FILE - 1):
+                _call_np_worker(data)
+                data = []
+
+        if len(data) > 0:
+            _call_np_worker(data)
+
+    def load_dataset_from_disk(self, root_dir, table_name, num_labels='all',
+                               append=False, no_temp_files=False):
+        """
+        Load images from disk into a greenplum database table. All the images
+            should be of the same shape.
+        @root_dir: Location of the dir which contains all the labels and their
+            associated images. Can be relative or absolute. Each label needs to
+            have it's own dir and should contain only images inside it's own 
dir.
+            (Extra files in root dir will be ignored, only diretories matter.)
+        @table_name: Name of destination table in db
+        @num_labels: Num of labels to process/load into a table. By default all
+            the labels are loaded.  @table_name: Name of the database table 
into
+            which images will be loaded.
+        @append: If set to true, do not create a new table but append to an
+            existing table.
+        @no_temp_files: EXPERIMENTAL.  Handle table creation in-memory, don't
+            write any temp files. (Not working in current testing; unknown 
why.)
+        """
+        start_time = time.time()
+        self.mother = True
+        self.append = append
+        self.no_temp_files = no_temp_files
+        self.table_name = table_name
+        self.from_disk = True
+        self._validate_input_and_create_table()
+
+        self.root_dir = root_dir
+        subdirs = os.listdir(root_dir)
+
+        labels = []
+        # Prune files from directory listing, only use actual sub-directories
+        #  This allows the user to keep a tar.gz file or other extraneous files
+        #  in the root directory without causing any problems.
+        for subdir in subdirs:
+            if os.path.isdir(os.path.join(root_dir,subdir)):
+                labels.append(subdir)
+            else:
+                print("{0} is not a directory, skipping".format(subdir))
+
+        if num_labels == 'all':
+            print('number of labels = {}'.format(len(labels)))
+            num_labels = len(labels)
+            print "Found {0} image labels in {1}".format(num_labels, root_dir)
+        else:
+            num_labels = int(num_labels)
+            labels = labels[:num_labels]
+            print "Using first {0} image labels in {1}".format(num_labels,
+                                                               root_dir)
+
+        if not self.pool:
+            print("Spawning {0} workers...".format(self.num_workers))
+            self.pool = Pool(processes=self.num_workers,
+                             initializer=init_worker,
+                             initargs=(current_process().pid,
+                                       self.table_name,
+                                       self.append,
+                                       self.no_temp_files,
+                                       self.db_creds,
+                                       self.from_disk,
+                                       root_dir))
+        try:
+            self.pool.map(_call_disk_worker, labels)
+        except(Exception) as e:
+            self.terminate_workers()
+            raise e
+
+        self.pool.map(_worker_cleanup, [0] * self.num_workers)
+
+        end_time = time.time()
+        print("Done!  Loaded {0} image categories in {1}s"\
+            .format(len(labels), end_time - start_time))
+
+        self.terminate_workers()
+
+def main():
+    parser = argparse.ArgumentParser(description='Madlib Image Loader',
+                                     
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('-r', '--root-dir', action='store',
+                        dest='root_dir', default='.',
+                        help='Root directory of image directories')
+
+    parser.add_argument('-n', '--num-labels', action='store',
+                        dest='num_labels', default='all',
+                        help='Number of image labels (categories) to load.')
+
+    parser.add_argument('-d', '--db-name', action='store',
+                        dest='db_name', default='madlib',
+                        help='Name of database where images should be loaded')
+
+    parser.add_argument('-a', '--append', action='store_true',
+                        dest='append', default=False,
+                        help='Name of database where images should be loaded')
+
+    parser.add_argument('-w', '--num-workers', action='store',
+                        dest='num_workers', default=5,
+                        help='Name of parallel workers.')
+
+    parser.add_argument('-p', '--port', action='store',
+                        dest='port', default=5432,
+                        help='database server port')
+
+    parser.add_argument('-U', '--username', action='store',
+                        dest='username', default=None,
+                        help='database user name')
+
+    parser.add_argument('-t', '--host', action='store',
+                        dest='host', default='localhost',
+                        help='database server host.')
+
+    parser.add_argument('-P', '--password', action='store',
+                        dest='password', default=None,
+                        help='database user password')
+
+    parser.add_argument('-m', '--no-temp-files', action='store_true',
+                        dest='no_temp_files', default=False,
+                        help="no temporary files, construct all image tables "
+                             " in-memory")
+
+    parser.add_argument('table_name',
+                        help='Name of table where images should be loaded')
+
+    args = parser.parse_args()
+
+    db_creds = DbCredentials(args.db_name, args.username, args.password,
+                             args.host, args.port)
+
+    iloader = ImageLoader(db_creds, int(args.num_workers))
+
+    iloader.load_dataset_from_disk(args.root_dir,
+                                   args.table_name,
+                                   args.num_labels,
+                                   args.append,
+                                   args.no_temp_files)
+
+if __name__ == '__main__':
+    main()

Reply via email to