[GitHub] [incubator-mxnet] nswamy commented on a change in pull request #14503: API to create RecordIO files

GitBox Wed, 03 Apr 2019 15:06:19 -0700

nswamy commented on a change in pull request #14503: API to create RecordIO 
files
URL: https://github.com/apache/incubator-mxnet/pull/14503#discussion_r271950830


 ##########
 File path: python/mxnet/io/_im2rec.py
 ##########
 @@ -0,0 +1,218 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""convert image dataset to recordio."""
+from collections import deque
+
+import os
+import logging
+import itertools
+import multiprocessing as mp
+from multiprocessing.managers import SyncManager
+from functools import partial
+
+from ..ndarray import array
+from .. import recordio
+
+try:
+    import Queue as queue
+except ImportError:
+    import queue as queue
+try:
+    import cv2
+except ImportError:
+    cv2 = None
+
+class SharedObjectManager(SyncManager):
+    """
+    shared object manager
+    """
+    pass
+SharedObjectManager.register("PriorityQueue", queue.PriorityQueue)
+
+def _read_list(list_file, batch_size):
+    """
+    Helper function that reads the .lst file, binds it in
+    a generator and returns a batched version of the generator.
+    Parameters
+    ----------
+    list_file: str
+    input list file.
+    batch_size: int
+    batch size of the generator
+    Returns
+    -------
+    item iterator
+    iterator that contains information in .lst file
+    """
+    with open(list_file, 'r') as input_file:
+        while True:
+            fetch_data = list(itertools.islice(input_file, batch_size))
+            if not fetch_data:
+                break
+            batch = []
+            for line in fetch_data:
+                line = [i.strip() for i in line.strip().split('\t')]
+                line_len = len(line)
+                # check the data format of .lst file
+                if line_len < 3:
+                    logging.info("lst should have at least has three parts, \
+                        but only has %s parts for %s", line_len, line)
+                    continue
+                try:
+                    item = [int(line[0])] + [line[-1]] + [float(i) for i in 
line[1:-1]]
+                    batch.append(item)
+                except IOError:
+                    logging.info('Parsing lst met error for %s : ', line)
+                    continue
+            yield batch
+
+def _read_worker(q_out, transformer, color, quality, encoding, pass_through, 
data_record):
+    """
+    Helper function that will be run by the read workers
+    to fetch the image from the input queue apply
+    transformations and put it into output priority queue.
+    Parameters
+    ----------
+    q_out : priority queue
+        priority queue
+    transformer : transformer object
+        transformer object
+    color : int
+        color
+    quality : int
+        quality
+    encoding : str
+        encoding
+    pass_through: bool
+        skip encoding while packing the image
+    data_record : tuple
+        image instance to work on.
+    """
+    i, item = data_record
+    img_path = item[1]
+    try:
+        # construct the header of the record
+        header = recordio.IRHeader(0, item[2:], item[0], 0)
+        if pass_through:
+            with open(img_path, 'rb') as f_im:
+                img = f_im.read()
+            packed_image = recordio.pack(header, img)
+            q_out.put((i, packed_image, item))
+        else:
+            img = cv2.imread(img_path, color)
+            if img is None:
+                logging.info('Read a blank image for the file: %s', img_path)
+                return
+            img = transformer(array(img))
+            packed_image = recordio.pack_img(header, img, quality=quality, 
img_fmt=encoding)
+            q_out.put((i, packed_image, item))
+    except IOError:
+        logging.info('pack_img error on file: %s', img_path)
+        return
+    except AttributeError:
+        logging.info("Using this API requires OpenCV. Unable to load cv2.")
+
+def _validate_filenames(list_file, output_path):
+    """
+    Helper function to validate the file paths of
+    the input list file and output .rec file path.
+    Parameters
+    --------
+    list_file: input list file path
+    output_path: path to the output directory
+    """
+    if not os.path.isfile(list_file):
+        raise Exception("Input list file is invalid - \
+            1. Wrong filename or file path \n2. List file should be of format 
*.lst")
+    if not os.path.isdir(output_path):
+        raise Exception("Output path should be a directory where the \
+            rec files will be stored.")
+
+def _count_elem(iter):
 
 Review comment:
   unused.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [incubator-mxnet] nswamy commented on a change in pull request #14503: API to create RecordIO files

Reply via email to