piiswrong closed pull request #9740: [MXNET-235] add axis support and gradient 
for L2norm
URL: https://github.com/apache/incubator-mxnet/pull/9740
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/src/operator/tensor/broadcast_reduce_op.h 
b/src/operator/tensor/broadcast_reduce_op.h
index f124ba3021e..2f6864b4869 100644
--- a/src/operator/tensor/broadcast_reduce_op.h
+++ b/src/operator/tensor/broadcast_reduce_op.h
@@ -64,6 +64,26 @@ struct ReduceAxesParam : public 
dmlc::Parameter<ReduceAxesParam> {
   }
 };
 
+struct NormParam : public dmlc::Parameter<NormParam> {
+  int ord;
+  TShape axis;
+  bool keepdims;
+  DMLC_DECLARE_PARAMETER(NormParam) {
+    DMLC_DECLARE_FIELD(ord).set_default(2)
+      .describe("Order of the norm. Currently ord=2 is supported.");
+    DMLC_DECLARE_FIELD(axis).set_default(TShape())
+      .describe(R"code(The axis or axes along which to perform the reduction.
+      The default, `axis=()`, will compute over all elements into a
+      scalar array with shape `(1,)`.
+      If `axis` is int, a reduction is performed on a particular axis.
+      If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices,
+      and the matrix norms of these matrices are computed.)code");
+    DMLC_DECLARE_FIELD(keepdims).set_default(false)
+      .describe("If this is set to `True`, the reduced axis is left "
+                "in the result as dimension with size one.");
+  }
+};
+
 struct ReduceAxisParam : public dmlc::Parameter<ReduceAxisParam> {
   dmlc::optional<int> axis;
   bool keepdims;
@@ -258,6 +278,19 @@ inline bool ReduceAxesShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
+inline bool NormShape(const nnvm::NodeAttrs& attrs,
+                      std::vector<TShape> *in_attrs,
+                      std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  if ((*in_attrs)[0].ndim() == 0) return false;
+  const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0,
+                     ReduceAxesShapeImpl((*in_attrs)[0], param.axis,
+                                         param.keepdims, false));
+  return true;
+}
+
 inline bool BroadcastAxesShape(const nnvm::NodeAttrs& attrs,
                                std::vector<TShape> *in_attrs,
                                std::vector<TShape> *out_attrs) {
@@ -403,9 +436,9 @@ void SearchAxisCompute(const nnvm::NodeAttrs& attrs,
   });
 }
 
-template<typename xpu, typename reducer, bool normalize = false>
-void ReduceAxesComputeImpl(const nnvm::NodeAttrs& attrs,
-                           const OpContext& ctx,
+template<typename xpu, typename reducer, bool normalize = false,
+         typename OP = op::mshadow_op::identity>
+void ReduceAxesComputeImpl(const OpContext& ctx,
                            const std::vector<TBlob>& inputs,
                            const std::vector<OpReqType>& req,
                            const std::vector<TBlob>& outputs,
@@ -424,7 +457,7 @@ void ReduceAxesComputeImpl(const nnvm::NodeAttrs& attrs,
           s, out_data.shape_, req[0], in_data.shape_);
       Tensor<xpu, 1, char> workspace =
           ctx.requested[0].get_space_typed<xpu, 1, 
char>(Shape1(workspace_size), s);
-      broadcast::Reduce<reducer, NDim, DType, op::mshadow_op::identity>(
+      broadcast::Reduce<reducer, NDim, DType, OP>(
           s, out_data, req[0], workspace, in_data);
       if (normalize) {
         auto out = out_data.FlatTo2D<xpu, DType>(s);
@@ -434,7 +467,8 @@ void ReduceAxesComputeImpl(const nnvm::NodeAttrs& attrs,
   });
 }
 
-template<typename xpu, typename reducer, bool normalize = false>
+template<typename xpu, typename reducer, bool normalize = false,
+         typename OP = op::mshadow_op::identity>
 void ReduceAxesCompute(const nnvm::NodeAttrs& attrs,
                        const OpContext& ctx,
                        const std::vector<TBlob>& inputs,
@@ -448,13 +482,13 @@ void ReduceAxesCompute(const nnvm::NodeAttrs& attrs,
     small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, 
param.exclude);
   }
 
-  ReduceAxesComputeImpl<xpu, reducer, normalize>(attrs, ctx, inputs, req, 
outputs, small);
+  ReduceAxesComputeImpl<xpu, reducer, normalize, OP>(ctx, inputs, req, 
outputs, small);
 }
 
-template <int req, int axis>
-struct SumCsrKernel;
+template <typename red_op, int req, int axis>
+struct ReduceCsrKernel;
 
-template <int req>
+template <typename red_op, int req>
 /* \brief The number of columns are divided equally among the number of threads
  * available.
  * Each thread gets a subset of columns. It iterates through all rows for the
@@ -466,7 +500,7 @@ template <int req>
  * to the intermediate sum. At the end of iteration through all
  * rows we have the sum along the axis for the subset of columns.
  */
-struct SumCsrKernel<req, 0> {
+struct ReduceCsrKernel<red_op, req, 0> {
   template <typename RType, typename IType, typename DType>
   MSHADOW_XINLINE static void Map(int j, DType* out_data,
                                   const RType* in_indptr, const IType* in_idx,
@@ -530,8 +564,7 @@ struct SumCsrKernel<req, 0> {
       for (IType col = row_seg_start;
            col < row_seg_end && mid <= row_indptr_end;) {
         if (col == in_idx[mid]) {
-          mshadow::red::sum::Reduce(sum[col], in_data[mid],
-                                   residual[col]);
+          red_op::Reduce(sum[col], in_data[mid], residual[col]);
           mid++;
           col++;
         } else if (in_idx[mid] < col) {
@@ -548,34 +581,28 @@ struct SumCsrKernel<req, 0> {
   }
 };
 
-template <int req>
-struct SumCsrKernel<req, 1> {
+template <typename red_op, int req>
+struct ReduceCsrKernel<red_op, req, 1> {
   template <typename RType, typename DType>
   MSHADOW_XINLINE static void Map(int i, DType* out_data,
                                   const RType* in_indptr,
                                   const DType* in_data) {
     DType sum, residual;
-    mshadow::red::sum::SetInitValue(sum, residual);
+    red_op::SetInitValue(sum, residual);
     for (RType k = in_indptr[i]; k < in_indptr[i + 1]; k++) {
-      mshadow::red::sum::Reduce(sum, in_data[k], residual);
+      red_op::Reduce(sum, in_data[k], residual);
     }
     KERNEL_ASSIGN(out_data[i], req, sum);
   }
 };
 
-/*! \brief If normalize is true, the mean should be computed instead of sum */
-template <typename xpu, bool normalize = false>
-void SumCsrImpl(const nnvm::NodeAttrs& attrs, mshadow::Stream<xpu>* s, const 
OpContext& ctx,
-                const NDArray& input, const OpReqType req, NDArray* output) {
+template <typename xpu, typename red_op, bool normalize = false>
+void ReduceCsrImpl(mshadow::Stream<xpu>* s, const OpContext& ctx,
+                   const NDArray& input, const OpReqType req,
+                   NDArray* output, const TShape reduce_axis) {
   if (req == kNullOp) return;
-  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
-  CHECK_EQ(param.axis.ndim(), 1U) << "sum(csr)/mean(csr) only supports axis 0 
or 1";
-  CHECK(param.axis[0] == 0 || param.axis[0] == 1)
-      << "sum(csr)/mean(csr) only support axis 0 or 1";
-  CHECK(!param.keepdims) << "keepdims not supported for sparse";
-  CHECK(!param.exclude) << "exclude not supported for sparse";
   int64_t out_data_size = 0;
-  if (param.axis[0] == 0) {
+  if (reduce_axis[0] == 0) {
     out_data_size = input.shape()[1];
   } else {
     out_data_size = input.shape()[0];
@@ -600,7 +627,7 @@ void SumCsrImpl(const nnvm::NodeAttrs& attrs, 
mshadow::Stream<xpu>* s, const OpC
     return;
   }
 
-  if (0 == param.axis[0]) {
+  if (0 == reduce_axis[0]) {
     MSHADOW_IDX_TYPE_SWITCH(input.aux_type(kIndPtr), RType, {
       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(kIdx), IType, {
         MSHADOW_TYPE_SWITCH(input.dtype(), DType, {
@@ -625,7 +652,7 @@ void SumCsrImpl(const nnvm::NodeAttrs& attrs, 
mshadow::Stream<xpu>* s, const OpC
 
             Kernel<set_zero, xpu>::Launch(s, out_data_size, sum.dptr_);
             Kernel<set_zero, xpu>::Launch(s, out_data_size, residual.dptr_);
-            Kernel<SumCsrKernel<req_type, 0>, xpu>::Launch(
+            Kernel<ReduceCsrKernel<red_op, req_type, 0>, xpu>::Launch(
                 s, num_threads, output->data().dptr<DType>(), in_indptr, 
in_idx,
                 in_data, sum.dptr_, residual.dptr_, num_rows, num_cols,
                 seg_len);
@@ -639,7 +666,7 @@ void SumCsrImpl(const nnvm::NodeAttrs& attrs, 
mshadow::Stream<xpu>* s, const OpC
         });
       });
     });
-  } else if (1 == param.axis[0]) {
+  } else if (1 == reduce_axis[0]) {
     MSHADOW_IDX_TYPE_SWITCH(input.aux_type(kIndPtr), RType, {
       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(kIdx), IType, {
         MSHADOW_TYPE_SWITCH(input.dtype(), DType, {
@@ -647,7 +674,7 @@ void SumCsrImpl(const nnvm::NodeAttrs& attrs, 
mshadow::Stream<xpu>* s, const OpC
             const RType* in_indptr = input.aux_data(kIndPtr).dptr<RType>();
             const DType* in_data = input.data().dptr<DType>();
             const IType num_cols = input.shape()[1];
-            Kernel<SumCsrKernel<req_type, 1>, xpu>::Launch(
+            Kernel<ReduceCsrKernel<red_op, req_type, 1>, xpu>::Launch(
                 s, out_data_size, output->data().dptr<DType>(), in_indptr,
                 in_data);
             if (normalize) {
@@ -663,6 +690,19 @@ void SumCsrImpl(const nnvm::NodeAttrs& attrs, 
mshadow::Stream<xpu>* s, const OpC
   }
 }
 
+/*! \brief If normalize is true, the mean should be computed instead of sum */
+template <typename xpu, typename red_op, bool normalize = false>
+void ReduceCsr(const nnvm::NodeAttrs& attrs, mshadow::Stream<xpu>* s, const 
OpContext& ctx,
+               const NDArray& input, const OpReqType req, NDArray* output) {
+  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
+  CHECK_EQ(param.axis.ndim(), 1U) << "sum(csr)/mean(csr) only supports axis 0 
or 1";
+  CHECK(param.axis[0] == 0 || param.axis[0] == 1)
+      << "sum(csr)/mean(csr) only support axis 0 or 1";
+  CHECK(!param.keepdims) << "keepdims not supported for sparse";
+  CHECK(!param.exclude) << "exclude not supported for sparse";
+  ReduceCsrImpl<xpu, red_op, normalize>(s, ctx, input, req, output, 
param.axis);
+}
+
 template <typename xpu, typename reducer, bool normalize = false>
 void SumOpForwardEx(const nnvm::NodeAttrs& attrs, const OpContext& ctx,
                     const std::vector<NDArray>& inputs,
@@ -677,28 +717,21 @@ void SumOpForwardEx(const nnvm::NodeAttrs& attrs, const 
OpContext& ctx,
     CHECK_EQ(inputs[0].shape().ndim(), 2U)
         << "sum(csr)/mean(csr) op only supports 2D ndarray as input";
     NDArray output = outputs[0];
-    SumCsrImpl<xpu, normalize>(attrs, s, ctx, inputs[0], req[0], &output);
+    ReduceCsr<xpu, mshadow::red::sum, normalize>(attrs, s, ctx, inputs[0],
+                                                 req[0], &output);
   } else {
     LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
   }
 }
 
-// works when shape inference of output is given
 template<typename xpu, typename OP, bool normalize = false>
-void ReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs,
-                                const OpContext& ctx,
-                                const std::vector<TBlob>& inputs,
-                                const std::vector<OpReqType>& req,
-                                const std::vector<TBlob>& outputs) {
+void ReduceAxesBackwardUseInOutImpl(const OpContext& ctx,
+                                    const TShape &small,
+                                    const std::vector<TBlob>& inputs,
+                                    const std::vector<OpReqType>& req,
+                                    const std::vector<TBlob>& outputs) {
   using namespace mshadow;
   using namespace mshadow::expr;
-  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
-  TShape small;
-  if (param.keepdims) {
-    small = inputs[0].shape_;
-  } else {
-    small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, 
param.exclude);
-  }
 
   TShape src_shape, dst_shape;
   BroadcastReduceShapeCompact(outputs[0].shape_, small, &src_shape, 
&dst_shape);
@@ -733,6 +766,25 @@ void ReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& 
attrs,
   });
 }
 
+// works when shape inference of output is given
+template<typename xpu, typename OP, bool normalize = false>
+void ReduceAxesBackwardUseInOut(const nnvm::NodeAttrs& attrs,
+                                const OpContext& ctx,
+                                const std::vector<TBlob>& inputs,
+                                const std::vector<OpReqType>& req,
+                                const std::vector<TBlob>& outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
+  TShape small;
+  if (param.keepdims) {
+    small = inputs[0].shape_;
+  } else {
+    small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, 
param.exclude);
+  }
+  ReduceAxesBackwardUseInOutImpl<xpu, OP, normalize>(ctx, small, inputs, req, 
outputs);
+}
+
 template<typename xpu>
 inline void BroadcastComputeImpl(const nnvm::NodeAttrs& attrs,
                                  const OpContext& ctx,
@@ -842,6 +894,47 @@ inline bool L2NormStorageType(const nnvm::NodeAttrs& attrs,
   return dispatched;
 }
 
+/*! \brief compute square on each element and sum reducer */
+struct sq_sum {
+  /*! \brief do reduction into dst */
+  template<typename DType>
+  MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src) 
{ // NOLINT(*)
+    dst += src * src;
+  }
+  /*! \brief do stable reduction into dst */
+  template<typename DType>
+  MSHADOW_XINLINE static void Reduce(volatile DType& dst,  volatile DType src, 
volatile DType& residual) { // NOLINT(*)
+    DType y = src * src - residual;
+    DType t = dst + y;
+    residual = (t - dst) - y;
+    dst = t;
+  }
+  /*!
+   *\brief calculate gradient of redres with respect to redsrc,
+   * redres: reduced result, redsrc: one of reduction element
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static DType PartialGrad(DType redres, DType redsrc) {
+    // This won't be called in backward.
+    return 1;
+  }
+  /*!
+   *\brief set the initial value during reduction
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static void SetInitValue(DType &initv) { // NOLINT(*)
+    initv = 0;
+  }
+  /*!
+   *\brief set the initial value during reduction
+   */
+  template<typename DType>
+  MSHADOW_XINLINE static void SetInitValue(DType &initv, DType &residual) { // 
NOLINT(*)
+    SetInitValue(initv);
+    residual = 0;
+  }
+};
+
 template<typename xpu>
 void L2NormComputeImpl(mshadow::Stream<xpu> *s,
                        const TBlob& input,
@@ -862,6 +955,18 @@ void L2NormComputeImpl(mshadow::Stream<xpu> *s,
   });
 }
 
+template<typename xpu>
+void SqRootForL2(const OpContext& ctx, OpReqType req, const TBlob &output) {
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  MSHADOW_REAL_TYPE_SWITCH(output.type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req, Req, {
+      DType* out_data = output.dptr<DType>();
+      using namespace mxnet_op;
+      Kernel<op_with_req<mshadow_op::square_root, Req>, xpu>::Launch(
+        s, output.Size(), out_data, out_data);
+    });
+  });
+}
 
 template<typename xpu>
 void L2NormCompute(const nnvm::NodeAttrs& attrs,
@@ -869,8 +974,40 @@ void L2NormCompute(const nnvm::NodeAttrs& attrs,
                    const std::vector<TBlob>& inputs,
                    const std::vector<OpReqType>& req,
                    const std::vector<TBlob>& outputs) {
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  L2NormComputeImpl(s, inputs[0], req[0], outputs[0]);
+  const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
+  CHECK_EQ(param.ord, 2) << "norm only support ord=2";
+  if (req[0] == kNullOp) return;
+
+  TShape small;
+  if (param.keepdims) {
+    small = outputs[0].shape_;
+  } else {
+    small = ReduceAxesShapeImpl(inputs[0].shape_, param.axis, true, false);
+  }
+  ReduceAxesComputeImpl<xpu, mshadow::red::sum, false, mshadow_op::square>(
+      ctx, inputs, req, outputs, small);
+  SqRootForL2<xpu>(ctx, req[0], outputs[0]);
+}
+
+template<typename xpu>
+void L2NormGradCompute(const nnvm::NodeAttrs& attrs,
+                       const OpContext& ctx,
+                       const std::vector<TBlob>& inputs,
+                       const std::vector<OpReqType>& req,
+                       const std::vector<TBlob>& outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  if (req[0] == kNullOp) return;
+
+  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
+  TShape small;
+  if (param.keepdims) {
+    small = inputs[0].shape_;
+  } else {
+    small = ReduceAxesShapeImpl(outputs[0].shape_, param.axis, true, 
param.exclude);
+  }
+  ReduceAxesBackwardUseInOutImpl<xpu, mshadow_op::div, false>(ctx, small, 
inputs,
+                                                              req, outputs);
 }
 
 template<typename xpu>
@@ -898,15 +1035,40 @@ void L2NormComputeEx(const nnvm::NodeAttrs& attrs,
   CHECK_EQ(inputs.size(), 1U);
   CHECK_EQ(outputs.size(), 1U);
   CHECK_EQ(req.size(), 1U);
+  const NormParam& param = nnvm::get<NormParam>(attrs.parsed);
+  CHECK_EQ(param.ord, 2) << "norm only support ord=2";
   mshadow::Stream<xpu>* s = ctx.get_stream<xpu>();
-  const NDArrayStorageType in_stype = inputs[0].storage_type();
-  if (in_stype == kCSRStorage || in_stype == kRowSparseStorage) {
-    L2NormComputeSparseImpl(s, inputs[0], req[0], outputs[0].data());
+  const NDArrayStorageType istype = inputs[0].storage_type();
+  if ((istype == kRowSparseStorage || istype == kCSRStorage)
+      && param.axis.ndim() == 0) {
+    // We only support norm on the entire array for now.
+    L2NormComputeSparseImpl<xpu>(s, inputs[0], req[0], outputs[0].data());
+
+  } else if (istype == kCSRStorage) {
+    CHECK_EQ(inputs[0].shape().ndim(), 2U)
+        << "norm(csr) op only supports 2D ndarray as input";
+    CHECK_EQ(param.axis.ndim(), 1U) << "sum(csr)/mean(csr) only supports axis 
0 or 1";
+    CHECK(param.axis[0] == 0 || param.axis[0] == 1)
+        << "sum(csr)/mean(csr) only support axis 0 or 1";
+    CHECK(!param.keepdims) << "keepdims not supported for sparse";
+    NDArray output = outputs[0];
+    ReduceCsrImpl<xpu, sq_sum, false>(s, ctx, inputs[0], req[0], &output, 
param.axis);
+    CHECK_EQ(outputs[0].storage_type(), kDefaultStorage);
+    SqRootForL2<xpu>(ctx, req[0], outputs[0].data());
   } else {
     LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
   }
 }
 
+template<typename xpu>
+void L2NormGradComputeEx(const nnvm::NodeAttrs& attrs,
+                         const OpContext& ctx,
+                         const std::vector<NDArray>& inputs,
+                         const std::vector<OpReqType>& req,
+                         const std::vector<NDArray>& outputs) {
+  LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
+}
+
 /*! \brief index element from array along axes */
 template<int ndim>
 struct pick {
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cc 
b/src/operator/tensor/broadcast_reduce_op_value.cc
index 40624e54ab4..da1d0350242 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cc
+++ b/src/operator/tensor/broadcast_reduce_op_value.cc
@@ -27,6 +27,7 @@
 namespace mxnet {
 namespace op {
 DMLC_REGISTER_PARAMETER(ReduceAxesParam);
+DMLC_REGISTER_PARAMETER(NormParam);
 DMLC_REGISTER_PARAMETER(ReduceAxisParam);
 DMLC_REGISTER_PARAMETER(BroadcastAxesParam);
 DMLC_REGISTER_PARAMETER(BroadcastToParam);
@@ -246,7 +247,11 @@ NNVM_REGISTER_OP(_broadcast_backward)
 
 NNVM_REGISTER_OP(norm)
 MXNET_ADD_SPARSE_OP_ALIAS(norm)
-.describe(R"code(Flattens the input array and then computes the l2 norm.
+.describe(R"code(Computes the norm on an NDArray.
+
+This operator computes the norm on an NDArray with the specified axis, 
depending
+on the value of the ord parameter. By default, it computes the L2 norm on the 
entire
+array.
 
 Examples::
 
@@ -266,21 +271,29 @@ Examples::
 )code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape",
-  [](const nnvm::NodeAttrs& attrs,
-     std::vector<TShape> *in_attrs,
-     std::vector<TShape> *out_attrs) {
-    CHECK_EQ(in_attrs->size(), 1U);
-    CHECK_EQ(out_attrs->size(), 1U);
-    if ((*in_attrs)[0].ndim() == 0) return false;
-    SHAPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::Shape1(1));
-    return true;
-  })
+.set_attr_parser(ParamParser<NormParam>)
+.set_attr<nnvm::FInferShape>("FInferShape", NormShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FInferStorageType>("FInferStorageType", L2NormStorageType)
+.set_attr<nnvm::FGradient>("FGradient", ReduceGrad{ "_backward_norm" })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
 .set_attr<FCompute>("FCompute<cpu>", L2NormCompute<cpu>)
 .set_attr<FComputeEx>("FComputeEx<cpu>", L2NormComputeEx<cpu>)
-.add_argument("data", "NDArray-or-Symbol", "Source input");
+.add_argument("data", "NDArray-or-Symbol", "The input")
+.add_arguments(NormParam::__FIELDS__());
+
+MXNET_OPERATOR_REGISTER_REDUCE_BACKWARD(_backward_norm)
+.set_num_inputs(1)
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", L2NormGradCompute<cpu>)
+.set_attr<FComputeEx>("FComputeEx<cpu>", L2NormGradComputeEx<cpu>);
+
 
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/tensor/broadcast_reduce_op_value.cu 
b/src/operator/tensor/broadcast_reduce_op_value.cu
index 5fd7cbfc89a..dfff359bde8 100644
--- a/src/operator/tensor/broadcast_reduce_op_value.cu
+++ b/src/operator/tensor/broadcast_reduce_op_value.cu
@@ -26,6 +26,28 @@
 
 namespace mxnet {
 namespace op {
+
+template<>
+void L2NormComputeEx<gpu>(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const std::vector<NDArray>& inputs,
+                          const std::vector<OpReqType>& req,
+                          const std::vector<NDArray>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req.size(), 1U);
+  mshadow::Stream<gpu>* s = ctx.get_stream<gpu>();
+  const ReduceAxesParam& param = nnvm::get<ReduceAxesParam>(attrs.parsed);
+  const NDArrayStorageType in_stype = inputs[0].storage_type();
+  // CSR and RowSparse only works on the entire array.
+  if ((in_stype == kCSRStorage || in_stype == kRowSparseStorage)
+      && param.axis.ndim() == 0) {
+    L2NormComputeSparseImpl(s, inputs[0], req[0], outputs[0].data());
+  } else {
+    LogUnimplementedOp(attrs, ctx, inputs, req, outputs);
+  }
+}
+
 NNVM_REGISTER_OP(sum)
 .set_attr<FCompute>("FCompute<gpu>", ReduceAxesCompute<gpu, 
mshadow::red::sum>);
 
diff --git a/src/operator/tensor/matrix_op-inl.h 
b/src/operator/tensor/matrix_op-inl.h
index 38ddf2c2d3f..f32473d64bc 100644
--- a/src/operator/tensor/matrix_op-inl.h
+++ b/src/operator/tensor/matrix_op-inl.h
@@ -1433,7 +1433,7 @@ void RepeatOpBackward(const nnvm::NodeAttrs& attrs,
   std::vector<TBlob> newInputs = {iblob};
 
   ReduceAxesComputeImpl<xpu, mshadow::red::sum, false>(
-      attrs, ctx, newInputs, req, newOutputs, rshapes.first);
+      ctx, newInputs, req, newOutputs, rshapes.first);
 }
 
 struct TileParam : public dmlc::Parameter<TileParam> {
@@ -1610,7 +1610,7 @@ void TileOpBackward(const nnvm::NodeAttrs& attrs,
   std::vector<TBlob> newInputs = {iblob};
 
   ReduceAxesComputeImpl<xpu, mshadow::red::sum, false>(
-      attrs, ctx, newInputs, req, newOutputs, rshapes.first);
+      ctx, newInputs, req, newOutputs, rshapes.first);
 }
 
 struct ReverseParam : public dmlc::Parameter<ReverseParam> {
diff --git a/tests/python/unittest/test_ndarray.py 
b/tests/python/unittest/test_ndarray.py
index c9790f8570f..2d4cb5f6bef 100644
--- a/tests/python/unittest/test_ndarray.py
+++ b/tests/python/unittest/test_ndarray.py
@@ -942,7 +942,7 @@ def check_fluent_regular(func, kwargs, shape=(5, 17, 1), 
equal_nan=False):
     check_fluent_regular('pad', {'mode': 'constant', 'pad_width': 
(0,0,0,0,3,0,0,4)}, shape=(5, 17, 2, 3))
     check_fluent_regular('reshape_like', {'rhs': mx.nd.ones((30, 17))}, 
shape=(5, 17, 2, 3))
 
-    for func in ['sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min']:
+    for func in ['sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min', 
'norm']:
         check_fluent_regular(func, {'axis': (1, 2)})
 
     check_fluent_regular('reshape', {'shape': (17, 1, 5)})
@@ -1166,6 +1166,38 @@ def test_ndarray_astype():
     y = x.astype(np.int32, copy=False)
     assert (id(x) == id(y))
 
+
+@with_seed()
+def test_norm(ctx=default_context()):
+    np_arr = np.random.uniform(size=(3, 3, 3, 3))
+    mx_arr = mx.nd.array(np_arr, ctx=ctx)
+    arr1 = np.linalg.norm(np_arr, keepdims=False)
+    arr2 = mx.nd.norm(mx_arr, keepdims=False)
+    print(arr1)
+    print(arr2.asnumpy())
+    mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy()[0])
+
+    for i in range(4):
+        arr1 = np.linalg.norm(np_arr, axis=i, keepdims=False)
+        arr2 = mx.nd.norm(mx_arr, axis=i, keepdims=False)
+        assert arr1.shape == arr2.shape
+        mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
+
+        arr1 = np.linalg.norm(np_arr, axis=i, keepdims=True)
+        arr2 = mx.nd.norm(mx_arr, axis=i, keepdims=True)
+        assert arr1.shape == arr2.shape
+        mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
+        if (i < 3):
+            arr1 = np.linalg.norm(np_arr, axis=(i, i+1), keepdims=False)
+            arr2 = mx.nd.norm(mx_arr, axis=(i, i+1), keepdims=False)
+            assert arr1.shape == arr2.shape
+            mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
+            arr1 = np.linalg.norm(np_arr, axis=(i, i+1), keepdims=True)
+            arr2 = mx.nd.norm(mx_arr, axis=(i, i+1), keepdims=True)
+            assert arr1.shape == arr2.shape
+            mx.test_utils.assert_almost_equal(arr1, arr2.asnumpy())
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()
diff --git a/tests/python/unittest/test_operator.py 
b/tests/python/unittest/test_operator.py
index 16fcbbace94..00394496868 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -1737,7 +1737,8 @@ def test_reshape_new(src_shape, shape_args, reverse, 
dst_shape):
 @with_seed()
 def test_reduce():
     sample_num = 500
-    def test_reduce_inner(numpy_reduce_func, numpy_reduce_grad_func, 
mx_reduce_sym, nan_prob = 0):
+    def test_reduce_inner(numpy_reduce_func, numpy_reduce_grad_func, 
mx_reduce_sym, nan_prob = 0,
+                          test_exclude = True):
         for i in range(sample_num):
             # Generate random data that has ndim between 1-7 and all the shape 
dims between 1-5
             # Insert a NaN with probability equal to nan_prob
@@ -1745,7 +1746,10 @@ def test_reduce_inner(numpy_reduce_func, 
numpy_reduce_grad_func, mx_reduce_sym,
             shape = np.random.randint(1, 6, size=(ndim,))
             axis_num = np.random.randint(0, ndim, size=1)
             axis_flags = np.random.randint(0, 2, size=ndim)
-            exclude = np.random.randint(0, 2)
+            if test_exclude:
+                exclude = np.random.randint(0, 2)
+            else:
+                exclude = False
             axes = []
             for (axis, flag) in enumerate(axis_flags):
                 if flag:
@@ -1819,6 +1823,10 @@ def test_reduce_inner(numpy_reduce_func, 
numpy_reduce_grad_func, mx_reduce_sym,
                       lambda outgrad, data, outdata, axis, keepdims, 
keepdim_shape:
                         outgrad.reshape(keepdim_shape) * (np.equal(data, 
outdata.reshape(keepdim_shape)).astype(np.float)),
                       mx.symbol.min)
+    test_reduce_inner(lambda data, axis, keepdims:np_reduce(data, axis, 
keepdims, np.linalg.norm),
+                      lambda outgrad, data, outdata, axis, keepdims, 
keepdim_shape:
+                        outgrad.reshape(keepdim_shape) * (data / 
outdata.reshape(keepdim_shape)),
+                      mx.symbol.norm, test_exclude=False)
 
 
 @with_seed()
diff --git a/tests/python/unittest/test_sparse_ndarray.py 
b/tests/python/unittest/test_sparse_ndarray.py
index 169ed89cb37..25eaf4291c9 100644
--- a/tests/python/unittest/test_sparse_ndarray.py
+++ b/tests/python/unittest/test_sparse_ndarray.py
@@ -843,7 +843,7 @@ def check_fluent_regular(stype, func, kwargs, shape=(5, 
17), equal_nan=False):
     check_fluent_regular('csr', 'slice', {'begin': (2, 5), 'end': (4, 7)}, 
shape=(5, 17))
     check_fluent_regular('row_sparse', 'clip', {'a_min': -0.25, 'a_max': 0.75})
 
-    for func in ['sum', 'mean']:
+    for func in ['sum', 'mean', 'norm']:
         check_fluent_regular('csr', func, {'axis': 0})
 
 
diff --git a/tests/python/unittest/test_symbol.py 
b/tests/python/unittest/test_symbol.py
index b9554b30858..d81b8cfb4c0 100644
--- a/tests/python/unittest/test_symbol.py
+++ b/tests/python/unittest/test_symbol.py
@@ -209,7 +209,7 @@ def check_fluent_regular(func, kwargs, shape=(5, 17, 1), 
equal_nan=False):
     check_fluent_regular('pad', {'mode': 'constant', 'pad_width': 
(0,0,0,0,3,0,0,4)}, shape=(5, 17, 2, 3))
     check_fluent_regular('reshape_like', {'rhs': mx.sym.ones((30, 17))}, 
shape=(5, 17, 2, 3))
 
-    for func in ['sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min']:
+    for func in ['sum', 'nansum', 'prod', 'nanprod', 'mean', 'max', 'min', 
'norm']:
         check_fluent_regular(func, {'axis': (1, 2)})
 
     check_fluent_regular('reshape', {'shape': (17, 1, 5)})


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to