[incubator-mxnet] branch master updated: Add Gluon Transformer Crop (#14259)

skm Wed, 03 Apr 2019 16:16:10 -0700

This is an automated email from the ASF dual-hosted git repository.

skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/master by this push:
     new 5241c1b  Add Gluon Transformer Crop (#14259)
5241c1b is described below

commit 5241c1b5c1dc029d3796aed64f3740550928fafa
Author: Jake Lee <[email protected]>
AuthorDate: Thu Apr 4 07:15:28 2019 +0800

    Add Gluon Transformer Crop (#14259)
    
    * implement crop
    
    * add crop operator
    
    * fix for linter
    
    * add. backword and refactor the code
    
    * fix error namespace
    
    * fix the website build failure
    
    * start adding the unit test of backword
    
    * add unit test for backward
    
    * address the comment
    
    * add missing statement
    
    * fix the website error
    
    * fix the website building
    
    * add missing doc
---
 python/mxnet/gluon/data/vision/transforms.py    |  61 ++++++++
 python/mxnet/image/image.py                     |   2 +-
 src/operator/image/crop-inl.h                   | 190 ++++++++++++++++++++++++
 src/operator/image/crop.cc                      |  85 +++++++++++
 tests/python/unittest/test_gluon_data_vision.py |  75 +++++++++-
 5 files changed, 410 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/gluon/data/vision/transforms.py 
b/python/mxnet/gluon/data/vision/transforms.py
index 9310e15..dff7f66 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -228,6 +228,67 @@ class RandomResizedCrop(Block):
         return image.random_size_crop(x, *self._args)[0]
 
 
+class CropResize(HybridBlock):
+    r"""Crop the input image with and optionally resize it.
+
+    Makes a crop of the original image then optionally resize it to the 
specified size.
+
+    Parameters
+    ----------
+    x : int
+        Left boundary of the cropping area
+    y : int
+        Top boundary of the cropping area
+    w : int
+        Width of the cropping area
+    h : int
+        Height of the cropping area
+    size : int or tuple of (w, h)
+        Optional, resize to new size after cropping
+    interpolation : int, optional
+        Interpolation method for resizing. By default uses bilinear
+        interpolation. See OpenCV's resize function for available choices.
+        
https://docs.opencv.org/2.4/modules/imgproc/doc/geometric_transformations.html?highlight=resize#resize
+        Note that the Resize on gpu use contrib.bilinearResize2D operator
+        which only support bilinear interpolation(1). The result would be 
slightly
+        different on gpu compared to cpu. OpenCV tend to align center while 
bilinearResize2D
+        use algorithm which aligns corner.
+
+
+    Inputs:
+        - **data**: input tensor with (H x W x C) or (N x H x W x C) shape.
+
+    Outputs:
+        - **out**: input tensor with (H x W x C) or (N x H x W x C) shape.
+
+    Examples
+    --------
+    >>> transformer = vision.transforms.CropResize(x=0, y=0, width=100, 
height=100)
+    >>> image = mx.nd.random.uniform(0, 255, (224, 224, 
3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    <NDArray 100x100x3 @cpu(0)>
+    >>> image = mx.nd.random.uniform(0, 255, (3, 224, 224, 
3)).astype(dtype=np.uint8)
+    >>> transformer(image)
+    <NDArray 3x100x100x3 @cpu(0)>
+    >>> transformer = vision.transforms.CropResize(x=0, y=0, width=100, 
height=100, size=(50, 50), interpolation=1)
+    >>> transformer(image)
+    <NDArray 3x50x50 @cpu(0)>
+    """
+    def __init__(self, x, y, width, height, size=None, interpolation=None):
+        super(CropResize, self).__init__()
+        self._x = x
+        self._y = y
+        self._width = width
+        self._height = height
+        self._size = size
+        self._interpolation = interpolation
+
+    def hybrid_forward(self, F, x):
+        out = F.image.crop(x, self._x, self._y, self._width, self._height)
+        if self._size:
+            out = F.image.resize(out, self._size, False, self._interpolation)
+        return out
+
 class CenterCrop(Block):
     """Crops the image `src` to the given `size` by trimming on all four
     sides and preserving the center of the image. Upsamples if `src` is
diff --git a/python/mxnet/image/image.py b/python/mxnet/image/image.py
index 1dd6656..d2631e8 100644
--- a/python/mxnet/image/image.py
+++ b/python/mxnet/image/image.py
@@ -428,7 +428,7 @@ def fixed_crop(src, x0, y0, w, h, size=None, interp=2):
     NDArray
         An `NDArray` containing the cropped image.
     """
-    out = nd.crop(src, begin=(y0, x0, 0), end=(y0 + h, x0 + w, 
int(src.shape[2])))
+    out = nd.slice(src, begin=(y0, x0, 0), end=(y0 + h, x0 + w, 
int(src.shape[2])))
     if size is not None and (w, h) != size:
         sizes = (h, w, size[1], size[0])
         out = imresize(out, *size, interp=_get_interp_method(interp, sizes))
diff --git a/src/operator/image/crop-inl.h b/src/operator/image/crop-inl.h
new file mode 100644
index 0000000..a1a4b23
--- /dev/null
+++ b/src/operator/image/crop-inl.h
@@ -0,0 +1,190 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file crop-inl.h
+ * \brief the image crop operator implementation
+ */
+
+#ifndef MXNET_OPERATOR_IMAGE_CROP_INL_H_
+#define MXNET_OPERATOR_IMAGE_CROP_INL_H_
+
+
+#include <algorithm>
+#include <vector>
+
+#include "mxnet/base.h"
+#include "dmlc/optional.h"
+#include "image_utils.h"
+#include "../mxnet_op.h"
+#include "../operator_common.h"
+#include "../../common/static_array.h"
+#include "../tensor/matrix_op-inl.h"
+#include "resize-inl.h"
+
+namespace mxnet {
+namespace op {
+namespace image {
+
+struct CropParam : public dmlc::Parameter<CropParam> {
+  int x;
+  int y;
+  int width;
+  int height;
+  DMLC_DECLARE_PARAMETER(CropParam) {
+    DMLC_DECLARE_FIELD(x)
+    .describe("Left boundary of the cropping area.");
+    DMLC_DECLARE_FIELD(y)
+    .describe("Top boundary of the cropping area.");
+    DMLC_DECLARE_FIELD(width)
+    .describe("Width of the cropping area.");
+    DMLC_DECLARE_FIELD(height)
+    .describe("Height of the cropping area.");
+  }
+};
+
+inline bool CropShape(const nnvm::NodeAttrs& attrs,
+                             std::vector<TShape> *in_attrs,
+                             std::vector<TShape> *out_attrs) {
+  // input attrs should only be (h, w, c) or (n, h, w, c)
+  if (in_attrs->at(0).ndim() == 3U) {
+    CHECK((in_attrs->at(0)[2] == 1) || (in_attrs->at(0)[2] == 3))
+      << "Expect channel of the input image is 1 or 3, but got"
+      << in_attrs->at(0)[2];
+  } else if (in_attrs->at(0).ndim() == 4U) {
+    CHECK((in_attrs->at(0)[3] == 1) || (in_attrs->at(0)[3] == 3))
+      << "Expect channel of the input image is 1 or 3, but got"
+      << in_attrs->at(0)[3];
+  } else {
+    LOG(FATAL) << "Image Crop expects inputs of 3D (h, w, c) or 4D (n, h, w, 
c). But got "
+      << in_attrs->at(0).ndim();
+  }
+
+  const auto& ishape = (*in_attrs)[0];
+  const CropParam& param = nnvm::get<CropParam>(attrs.parsed);
+
+  CHECK((param.height > 0) && (param.width > 0))
+    << "Input height and width must be greater than 0";
+  CHECK(param.x + param.width <= ishape[ishape.ndim() - 2])
+    << " x + width should not be greater than input width";
+  CHECK(param.y + param.height <= ishape[ishape.ndim() - 3])
+    << " y + height should not be greater than input height";
+  if (ishape.ndim() == 3) {
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({param.height, param.width, 
ishape[C]}));
+  } else {
+    SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({ishape[N], param.height, 
param.width, ishape[kC]}));
+  }
+  return true;
+}
+
+inline void CropImpl(int x,
+                      int y,
+                      int width,
+                      int height,
+                      const std::vector<TBlob> &inputs,
+                      const std::vector<TBlob> &outputs,
+                      const OpContext &ctx,
+                      const std::vector<OpReqType> &req) {
+  using namespace mshadow;
+  const TBlob& data = inputs[0];
+  const TBlob& out = outputs[0];
+  MXNET_NDIM_SWITCH(data.ndim(), ndim, {
+    Stream<cpu>* s = ctx.get_stream<cpu>();
+    common::StaticArray<index_t, ndim> begin = {0}, step = {1};
+    if (ndim == 3) {
+      begin[0] = y;
+      begin[1] = x;
+    } else {
+      begin[1] = y;
+      begin[2] = x;
+    }
+    MSHADOW_TYPE_SWITCH(out.type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+        size_t num_threads = out.shape_.FlatTo2D()[0];
+        mxnet_op::Kernel<slice_forward<ndim, Req, cpu>, cpu>::Launch(s, 
num_threads,
+          out.dptr<DType>(), data.dptr<DType>(),
+          data.shape_.get<ndim>(), out.shape_.get<ndim>(), begin, step);
+      })
+    })
+  })
+}
+
+inline void CropBackwardImpl(int x,
+                      int y,
+                      int width,
+                      int height,
+                      const std::vector<TBlob> &inputs,
+                      const std::vector<TBlob> &outputs,
+                      const OpContext &ctx,
+                      const std::vector<OpReqType> &req) {
+  using namespace mshadow;
+  if (req[0] == kNullOp) return;
+  const TBlob& output_grad = inputs[0];
+  const TBlob& input_grad = outputs[0];
+  Stream<cpu>* s = ctx.get_stream<cpu>();
+  if (req[0] == kWriteTo) {
+    Fill(s, input_grad, req[0], 0);
+  } else if (req[0] == kWriteInplace) {
+    LOG(FATAL) << "_backward_image_crop does not support kWriteInplace";
+  }
+  MXNET_NDIM_SWITCH(output_grad.ndim(), ndim, {
+    common::StaticArray<index_t, ndim> begin = {0}, step = {1};
+    if (ndim == 3) {
+      begin[0] = y;
+      begin[1] = x;
+    } else {
+      begin[1] = y;
+      begin[2] = x;
+    }
+    MSHADOW_TYPE_SWITCH(output_grad.type_flag_, DType, {
+      MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+        size_t num_threads = output_grad.shape_.FlatTo2D()[0];
+        mxnet_op::Kernel<slice_assign<ndim, Req, cpu>, cpu>::Launch(s, 
num_threads,
+          input_grad.dptr<DType>(), output_grad.dptr<DType>(),
+          input_grad.shape_.get<ndim>(), output_grad.shape_.get<ndim>(), 
begin, step);
+      })
+    })
+  })
+}
+
+inline void CropOpForward(const nnvm::NodeAttrs &attrs,
+                   const OpContext &ctx,
+                   const std::vector<TBlob> &inputs,
+                   const std::vector<OpReqType> &req,
+                   const std::vector<TBlob> &outputs) {
+  CHECK_EQ(outputs.size(), 1U);
+  const CropParam& param = nnvm::get<CropParam>(attrs.parsed);
+  CropImpl(param.x, param.y, param.width, param.height, inputs, outputs, ctx, 
req);
+}
+
+inline void CropOpBackward(const nnvm::NodeAttrs &attrs,
+                   const OpContext &ctx,
+                   const std::vector<TBlob> &inputs,
+                   const std::vector<OpReqType> &req,
+                   const std::vector<TBlob> &outputs) {
+  CHECK_EQ(outputs.size(), 1U);
+  const CropParam& param = nnvm::get<CropParam>(attrs.parsed);
+  CropBackwardImpl(param.x, param.y, param.width, param.height, inputs, 
outputs, ctx, req);
+}
+}  // namespace image
+}  // namespace op
+}  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_IMAGE_CROP_INL_H_
diff --git a/src/operator/image/crop.cc b/src/operator/image/crop.cc
new file mode 100644
index 0000000..52d2f11
--- /dev/null
+++ b/src/operator/image/crop.cc
@@ -0,0 +1,85 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file crop-cc.h
+ * \brief the image crop operator registration
+ */
+
+#include "mxnet/base.h"
+#include "crop-inl.h"
+#include "../operator_common.h"
+#include "../elemwise_op_common.h"
+
+namespace mxnet {
+namespace op {
+namespace image {
+
+DMLC_REGISTER_PARAMETER(CropParam);
+
+NNVM_REGISTER_OP(_image_crop)
+.describe(R"code(Crop an image NDArray of shape (H x W x C) or (N x H x W x C) 
+to the given size.
+Example:
+    .. code-block:: python
+        image = mx.nd.random.uniform(0, 255, (4, 2, 3)).astype(dtype=np.uint8)
+        mx.nd.image.crop(image, 1, 1, 2, 2)
+            [[[144  34   4]
+              [ 82 157  38]]
+
+             [[156 111 230]
+              [177  25  15]]]
+            <NDArray 2x2x3 @cpu(0)>
+        image = mx.nd.random.uniform(0, 255, (2, 4, 2, 
3)).astype(dtype=np.uint8)
+        mx.nd.image.crop(image, 1, 1, 2, 2)            
+            [[[[ 35 198  50]
+               [242  94 168]]
+
+              [[223 119 129]
+               [249  14 154]]]
+
+
+              [[[137 215 106]
+                [ 79 174 133]]
+
+               [[116 142 109]
+                [ 35 239  50]]]]
+            <NDArray 2x2x2x3 @cpu(0)>
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<CropParam>)
+.set_attr<mxnet::FInferShape>("FInferShape", CropShape)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", CropOpForward)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ 
"_backward_image_crop" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(CropParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_image_crop)
+.set_attr_parser(ParamParser<CropParam>)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", CropOpBackward);
+
+}  // namespace image
+}  // namespace op
+}  // namespace mxnet
diff --git a/tests/python/unittest/test_gluon_data_vision.py 
b/tests/python/unittest/test_gluon_data_vision.py
index a855fc8..cc15bec 100644
--- a/tests/python/unittest/test_gluon_data_vision.py
+++ b/tests/python/unittest/test_gluon_data_vision.py
@@ -15,14 +15,16 @@
 # specific language governing permissions and limitations
 # under the License.
 from __future__ import print_function
+from collections import namedtuple
+
 import mxnet as mx
 import mxnet.ndarray as nd
 from mxnet.base import MXNetError
 from mxnet import gluon
 from mxnet.base import MXNetError
 from mxnet.gluon.data.vision import transforms
-from mxnet.test_utils import assert_almost_equal
-from mxnet.test_utils import almost_equal
+from mxnet import image
+from mxnet.test_utils import *
 from common import assertRaises, setup_module, with_seed, teardown
 
 import numpy as np
@@ -119,6 +121,75 @@ def test_resize():
 
 
 @with_seed()
+def test_crop_resize():
+    def _test_crop_resize_with_diff_type(dtype):
+        # test normal case
+        data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype)
+        out_nd = transforms.CropResize(0, 0, 3, 2)(data_in)
+        out_np = out_nd.asnumpy()
+        assert(out_np.sum() == 180)
+        assert((out_np[0:2,1,1].flatten() == [4, 16]).all())
+        # test 4D input
+        data_bath_in = nd.arange(180).reshape((2, 6, 5, 3)).astype(dtype)
+        out_batch_nd = transforms.CropResize(1, 2, 3, 4)(data_bath_in)
+        out_batch_np = out_batch_nd.asnumpy()
+        assert(out_batch_np.sum() == 7524)
+        assert((out_batch_np[0:2,0:4,1,1].flatten() == [37,  52,  67,  82, 
127, 142, 157, 172]).all())
+        # test normal case with resize
+        data_in = nd.random.uniform(0, 255, (300, 200, 3)).astype(dtype)
+        out_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 2)(data_in)
+        data_expected = image.imresize(nd.slice(data_in, (0, 0, 0), (50, 100 , 
3)), 25, 25, 2)
+        assert_almost_equal(out_nd.asnumpy(), data_expected.asnumpy())
+        # test 4D input with resize
+        data_bath_in = nd.random.uniform(0, 255, (3, 300, 200, 
3)).astype(dtype)
+        out_batch_nd = transforms.CropResize(0, 0, 100, 50, (25, 25), 
2)(data_bath_in)
+        for i in range(len(out_batch_nd)):
+            assert_almost_equal(image.imresize(nd.slice(data_bath_in[i], (0, 
0, 0), (50, 100, 3)), 25, 25, 2).asnumpy(),
+                out_batch_nd[i].asnumpy())
+        # test with resize height and width should be greater than 0
+        transformer = transforms.CropResize(0, 0, 100, 50, (-25, 25), 2)
+        assertRaises(MXNetError, transformer, data_in)
+        # test height and width should be greater than 0 
+        transformer = transforms.CropResize(0, 0, -100, -50)
+        assertRaises(MXNetError, transformer, data_in)
+        # test cropped area is bigger than input data
+        transformer = transforms.CropResize(150, 200, 200, 500)
+        assertRaises(MXNetError, transformer, data_in)
+        assertRaises(MXNetError, transformer, data_bath_in)
+
+    for dtype in ['uint8', 'float32', 'float64']:
+        _test_crop_resize_with_diff_type(dtype)  
+
+    # test nd.image.crop backward
+    def test_crop_backward(test_nd_arr, TestCase):
+        a_np = test_nd_arr.asnumpy()
+        b_np = a_np[(slice(TestCase.y, TestCase.y + TestCase.height), 
slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))]
+
+        data = mx.sym.Variable('data')
+        crop_sym = mx.sym.image.crop(data, TestCase.x, TestCase.y, 
TestCase.width, TestCase.height)
+
+        expected_in_grad = np.zeros_like(a_np)
+        expected_in_grad[(slice(TestCase.y, TestCase.y + TestCase.height), 
slice(TestCase.x, TestCase.x + TestCase.width), slice(0, 3))] = b_np
+        check_symbolic_backward(crop_sym, [a_np], [b_np], [expected_in_grad])
+
+    TestCase = namedtuple('TestCase', ['x', 'y', 'width', 'height'])
+    test_list = [TestCase(0, 0, 3, 3), TestCase(2, 1, 1, 2), TestCase(0, 1, 3, 
2)]
+
+    for dtype in ['uint8', 'float32', 'float64']:
+        data_in = nd.arange(60).reshape((5, 4, 3)).astype(dtype)
+        for test_case in test_list:
+            test_crop_backward(data_in, test_case)
+        
+
+
+    # check numeric gradient of nd.image.crop
+    # in_data = np.arange(36).reshape(3, 4, 3)
+    # data = mx.sym.Variable('data')
+    # image_crop_sym = mx.sym.image.crop(data, 0, 0, 2, 2)
+    # check_numeric_gradient(image_crop_sym, [in_data])
+
+
+@with_seed()
 def test_flip_left_right():
     data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
     flip_in = data_in[:, ::-1, :]

[incubator-mxnet] branch master updated: Add Gluon Transformer Crop (#14259)

Reply via email to