This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new c7577e5 added extraction/generation of diagonal and triangonal
matrices to linalg (#14501)
c7577e5 is described below
commit c7577e59841c6289e52d33d4cbba5b2c61859c01
Author: moin <[email protected]>
AuthorDate: Tue Apr 30 18:28:23 2019 +0200
added extraction/generation of diagonal and triangonal matrices to linalg
(#14501)
---
docs/api/python/ndarray/linalg.md | 6 +-
docs/api/python/symbol/linalg.md | 6 +-
src/operator/tensor/la_op-inl.h | 94 ++++++++++++++
src/operator/tensor/la_op.cc | 231 +++++++++++++++++++++++++++++++++
src/operator/tensor/la_op.cu | 24 ++++
src/operator/tensor/la_op.h | 68 ++++++++++
tests/python/unittest/test_operator.py | 45 +++++++
7 files changed, 472 insertions(+), 2 deletions(-)
diff --git a/docs/api/python/ndarray/linalg.md
b/docs/api/python/ndarray/linalg.md
index 41436c3..b73d968 100644
--- a/docs/api/python/ndarray/linalg.md
+++ b/docs/api/python/ndarray/linalg.md
@@ -51,10 +51,14 @@ In the rest of this document, we list routines provided by
the `ndarray.linalg`
potri
trmm
trsm
- sumlogdiag
syrk
gelqf
syevd
+ sumlogdiag
+ extractdiag
+ makediag
+ extracttrian
+ maketrian
```
## API Reference
diff --git a/docs/api/python/symbol/linalg.md b/docs/api/python/symbol/linalg.md
index f1891e2..5b467b5 100644
--- a/docs/api/python/symbol/linalg.md
+++ b/docs/api/python/symbol/linalg.md
@@ -51,10 +51,14 @@ In the rest of this document, we list routines provided by
the `symbol.linalg` p
potri
trmm
trsm
- sumlogdiag
syrk
gelqf
syevd
+ sumlogdiag
+ extractdiag
+ makediag
+ extracttrian
+ maketrian
```
## API Reference
diff --git a/src/operator/tensor/la_op-inl.h b/src/operator/tensor/la_op-inl.h
index e89a082..bda8137 100644
--- a/src/operator/tensor/la_op-inl.h
+++ b/src/operator/tensor/la_op-inl.h
@@ -229,6 +229,100 @@ struct sumlogdiag {
}
};
+template<bool forward>
+struct CopyDiag {
+ template<typename DType>
+ MSHADOW_XINLINE static void Map(int i, int k, int n, DType* A, DType* B) {
+ // Index of the matrix from which the diagonal should be extracted.
+ const int matrix(i / (n-abs(k)));
+ // Index of the diagonal element that should be extracted.
+ const int index(i % (n-abs(k)));
+ // row/col that must be looked up.
+ const int row(index-(k < 0 ? k : 0)), col(index+(k > 0 ? k :0));
+ if (forward) {
+ B[i] = A[(matrix*n+row)*n+col];
+ } else {
+ B[(matrix*n+row)*n+col] = A[i];
+ }
+ }
+};
+
+struct copydiag {
+ // Extracts diagonal from matrix.
+ template<typename xpu, typename DType>
+ static void op(const Tensor<xpu, 3, DType>& A, const Tensor<xpu, 2, DType>&
B,
+ const OpContext& ctx, const nnvm::NodeAttrs& attrs) {
+ using namespace mxnet_op;
+ Stream<xpu> *s = ctx.get_stream<xpu>();
+ const LaDiagParam& param = nnvm::get<LaDiagParam>(attrs.parsed);
+ Kernel<CopyDiag<true>, xpu>::Launch(s, B.MSize(), param.offset, A.size(1),
A.dptr_, B.dptr_);
+ }
+ // Sets diagonal in matrix.
+ template<typename xpu, typename DType>
+ static void op(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 3, DType>&
B,
+ const OpContext& ctx, const nnvm::NodeAttrs& attrs) {
+ using namespace mxnet_op;
+ Stream<xpu> *s = ctx.get_stream<xpu>();
+ const LaDiagParam& param = nnvm::get<LaDiagParam>(attrs.parsed);
+ Kernel<set_zero, xpu>::Launch(s, B.MSize(), B.dptr_);
+ Kernel<CopyDiag<false>, xpu>::Launch(s, A.MSize(), param.offset,
B.size(1), A.dptr_, B.dptr_);
+ }
+};
+
+template<bool forward>
+struct CopyTrian {
+ template<typename DType>
+ MSHADOW_XINLINE static void Map(int i, bool lower, int k, int n, DType* A,
DType* B) {
+ // Matrix that this index belongs to.
+ const int matrix(i/(n*n));
+ // Row/Col that this index represents.
+ int row((i/n)%n), col(i%n);
+ if ((k > 0) || ((k == 0) && !lower)) {
+ // When working on upper triangle we switch to transposed coordinates
for indexing.
+ int tmp(row);
+ row = col;
+ col = tmp;
+ }
+ // Actual row inside the lower triangular matrix after offset adjustment.
+ row -= abs(k);
+ if (row >= col) {
+ // Index in the 1-dimensional array that holds the values of the
triangle.
+ const int index((row*(row+1))/2+col);
+ // Total number of entries in the triangle.
+ const int m(((n-abs(k))*(n-abs(k)+1))/2);
+ if (forward) {
+ B[m*matrix+index] = A[i];
+ } else {
+ B[i] = A[m*matrix+index];
+ }
+ }
+ }
+};
+
+struct copytrian {
+ // Extracts triangle from matrix.
+ template<typename xpu, typename DType>
+ static void op(const Tensor<xpu, 3, DType>& A, const Tensor<xpu, 2, DType>&
B,
+ const OpContext& ctx, const nnvm::NodeAttrs& attrs) {
+ using namespace mxnet_op;
+ Stream<xpu> *s = ctx.get_stream<xpu>();
+ const LaTrianParam& param = nnvm::get<LaTrianParam>(attrs.parsed);
+ Kernel<CopyTrian<true>, xpu>::Launch(s, A.MSize(), param.lower,
param.offset,
+ A.size(1), A.dptr_, B.dptr_);
+ }
+ // Sets triangle in matrix.
+ template<typename xpu, typename DType>
+ static void op(const Tensor<xpu, 2, DType>& A, const Tensor<xpu, 3, DType>&
B,
+ const OpContext& ctx, const nnvm::NodeAttrs& attrs) {
+ using namespace mxnet_op;
+ Stream<xpu> *s = ctx.get_stream<xpu>();
+ const LaTrianParam& param = nnvm::get<LaTrianParam>(attrs.parsed);
+ Kernel<set_zero, xpu>::Launch(s, B.MSize(), B.dptr_);
+ Kernel<CopyTrian<false>, xpu>::Launch(s, B.MSize(), param.lower,
param.offset,
+ B.size(1), A.dptr_, B.dptr_);
+ }
+};
+
// B = syrk(A)
struct syrk {
template<typename xpu, typename DType>
diff --git a/src/operator/tensor/la_op.cc b/src/operator/tensor/la_op.cc
index 12cea91..d6e64c4 100644
--- a/src/operator/tensor/la_op.cc
+++ b/src/operator/tensor/la_op.cc
@@ -33,6 +33,8 @@ DMLC_REGISTER_PARAMETER(LaMatrixMacParam);
DMLC_REGISTER_PARAMETER(LaMatrixMultParam);
DMLC_REGISTER_PARAMETER(LaCholeskyParam);
DMLC_REGISTER_PARAMETER(LaTriangMatrixMultParam);
+DMLC_REGISTER_PARAMETER(LaDiagParam);
+DMLC_REGISTER_PARAMETER(LaTrianParam);
DMLC_REGISTER_PARAMETER(LaSyrkParam);
NNVM_REGISTER_OP(_linalg_gemm)
@@ -461,6 +463,235 @@ NNVM_REGISTER_OP(_backward_linalg_sumlogdiag)
.set_attr<nnvm::TIsBackward>("TIsBackward", true)
.set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 2, 2, 1,
sumlogdiag_backward>);
+NNVM_REGISTER_OP(_linalg_extractdiag)
+.add_alias("linalg_extractdiag")
+.describe(R"code(Extracts the diagonal entries of a square matrix.
+Input is a tensor *A* of dimension *n >= 2*.
+
+If *n=2*, then *A* represents a single square matrix which diagonal elements
get extracted as a 1-dimensional tensor.
+
+If *n>2*, then *A* represents a batch of square matrices on the trailing two
dimensions. The extracted diagonals are returned as an *n-1*-dimensional tensor.
+
+.. note:: The operator supports float32 and float64 data types only.
+
+Examples::
+
+ // Single matrix diagonal extraction
+ A = [[1.0, 2.0],
+ [3.0, 4.0]]
+
+ extractdiag(A) = [1.0, 4.0]
+
+ extractdiag(A, 1) = [2.0]
+
+ // Batch matrix diagonal extraction
+ A = [[[1.0, 2.0],
+ [3.0, 4.0]],
+ [[5.0, 6.0],
+ [7.0, 8.0]]]
+
+ extractdiag(A) = [[1.0, 4.0],
+ [5.0, 8.0]]
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaDiagParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
+ { return std::vector<std::string>{"A"}; } )
+.set_attr<mxnet::FInferShape>("FInferShape", LaDiagTrianShape<true, true>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 2, 1, 1, 1, copydiag>)
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_linalg_extractdiag"})
+.add_argument("A", "NDArray-or-Symbol", "Tensor of square matrices")
+.add_arguments(LaDiagParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_linalg_extractdiag)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaDiagParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs)
+ { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 1, 2, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_linalg_makediag)
+.add_alias("linalg_makediag")
+.describe(R"code(Constructs a square matrix with the input as diagonal.
+Input is a tensor *A* of dimension *n >= 1*.
+
+If *n=1*, then *A* represents the diagonal entries of a single square matrix.
This matrix will be returned as a 2-dimensional tensor.
+If *n>1*, then *A* represents a batch of diagonals of square matrices. The
batch of diagonal matrices will be returned as an *n+1*-dimensional tensor.
+
+.. note:: The operator supports float32 and float64 data types only.
+
+Examples::
+
+ // Single diagonal matrix construction
+ A = [1.0, 2.0]
+
+ makediag(A) = [[1.0, 0.0],
+ [0.0, 2.0]]
+
+ makediag(A, 1) = [[0.0, 1.0, 0.0],
+ [0.0, 0.0, 2.0],
+ [0.0, 0.0, 0.0]]
+
+ // Batch diagonal matrix construction
+ A = [[1.0, 2.0],
+ [3.0, 4.0]]
+
+ makediag(A) = [[[1.0, 0.0],
+ [0.0, 2.0]],
+ [[3.0, 0.0],
+ [0.0, 4.0]]]
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaDiagParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
+ { return std::vector<std::string>{"A"}; } )
+.set_attr<mxnet::FInferShape>("FInferShape", LaDiagTrianShape<true, false>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 1, 2, 1, 1, copydiag>)
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_linalg_makediag"})
+.add_argument("A", "NDArray-or-Symbol", "Tensor of diagonal entries")
+.add_arguments(LaDiagParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_linalg_makediag)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaDiagParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs)
+ { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 1, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_linalg_extracttrian)
+.add_alias("linalg_extracttrian")
+.describe(R"code(Extracts a triangular sub-matrix from a square matrix.
+Input is a tensor *A* of dimension *n >= 2*.
+
+If *n=2*, then *A* represents a single square matrix from which a triangular
sub-matrix is extracted as a 1-dimensional tensor.
+
+If *n>2*, then *A* represents a batch of square matrices on the trailing two
dimensions. The extracted triangular sub-matrices are returned as an
*n-1*-dimensional tensor.
+
+The *offset* and *lower* parameters determine the triangle to be extracted:
+
+- When *offset = 0* either the lower or upper triangle with respect to the
main diagonal is extracted depending on the value of parameter *lower*.
+- When *offset = k > 0* the upper triangle with respect to the k-th diagonal
above the main diagonal is extracted.
+- When *offset = k < 0* the lower triangle with respect to the k-th diagonal
below the main diagonal is extracted.
+
+.. note:: The operator supports float32 and float64 data types only.
+
+Examples::
+
+ // Single triagonal extraction
+ A = [[1.0, 2.0],
+ [3.0, 4.0]]
+
+ extracttrian(A) = [1.0, 3.0, 4.0]
+ extracttrian(A, lower=False) = [1.0, 2.0, 4.0]
+ extracttrian(A, 1) = [2.0]
+ extracttrian(A, -1) = [3.0]
+
+ // Batch triagonal extraction
+ A = [[[1.0, 2.0],
+ [3.0, 4.0]],
+ [[5.0, 6.0],
+ [7.0, 8.0]]]
+
+ extracttrian(A) = [[1.0, 3.0, 4.0],
+ [5.0, 7.0, 8.0]]
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaTrianParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
+ { return std::vector<std::string>{"A"}; } )
+.set_attr<mxnet::FInferShape>("FInferShape", LaDiagTrianShape<false, true>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 2, 1, 1, 1, copytrian>)
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_linalg_extracttrian"})
+.add_argument("A", "NDArray-or-Symbol", "Tensor of square matrices")
+.add_arguments(LaTrianParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_linalg_extracttrian)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaTrianParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs)
+ { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 1, 2, 1, 1, copytrian>);
+
+NNVM_REGISTER_OP(_linalg_maketrian)
+.add_alias("linalg_maketrian")
+.describe(R"code(Constructs a square matrix with the input representing a
specific triangular sub-matrix.
+This is basically the inverse of *linalg.extracttrian*. Input is a tensor *A*
of dimension *n >= 1*.
+
+If *n=1*, then *A* represents the entries of a triangular matrix which is
lower triangular if *offset<0* or *offset=0*, *lower=true*. The resulting
matrix is derived by first constructing the square
+matrix with the entries outside the triangle set to zero and then adding
*offset*-times an additional
+diagonal with zero entries to the square matrix.
+
+If *n>1*, then *A* represents a batch of triangular sub-matrices. The batch of
corresponding square matrices is returned as an *n+1*-dimensional tensor.
+
+.. note:: The operator supports float32 and float64 data types only.
+
+Examples::
+
+ // Single matrix construction
+ A = [1.0, 2.0, 3.0]
+
+ maketrian(A) = [[1.0, 0.0],
+ [2.0, 3.0]]
+
+ maketrian(A, lower=false) = [[1.0, 2.0],
+ [0.0, 3.0]]
+
+ maketrian(A, offset=1) = [[0.0, 1.0, 2.0],
+ [0.0, 0.0, 3.0],
+ [0.0, 0.0, 0.0]]
+ maketrian(A, offset=-1) = [[0.0, 0.0, 0.0],
+ [1.0, 0.0, 0.0],
+ [2.0, 3.0, 0.0]]
+
+ // Batch matrix construction
+ A = [[1.0, 2.0, 3.0],
+ [4.0, 5.0, 6.0]]
+
+ maketrian(A) = [[[1.0, 0.0],
+ [2.0, 3.0]],
+ [[4.0, 0.0],
+ [5.0, 6.0]]]
+
+ maketrian(A, offset=1) = [[[0.0, 1.0, 2.0],
+ [0.0, 0.0, 3.0],
+ [0.0, 0.0, 0.0]],
+ [[0.0, 4.0, 5.0],
+ [0.0, 0.0, 6.0],
+ [0.0, 0.0, 0.0]]]
+)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaTrianParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
+ { return std::vector<std::string>{"A"}; } )
+.set_attr<mxnet::FInferShape>("FInferShape", LaDiagTrianShape<false, false>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", LaOpForward<cpu, 1, 2, 1, 1, copytrian>)
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_linalg_maketrian"})
+.add_argument("A", "NDArray-or-Symbol", "Tensor of triangular matrices stored
as vectors")
+.add_arguments(LaTrianParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_linalg_maketrian)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<LaTrianParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs)
+ { return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; })
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", LaOpBackward<cpu, 2, 1, 1, 1, copytrian>);
+
NNVM_REGISTER_OP(_linalg_syrk)
.add_alias("linalg_syrk")
.describe(R"code(Multiplication of matrix with its transpose.
diff --git a/src/operator/tensor/la_op.cu b/src/operator/tensor/la_op.cu
index 29a4846..ec310fe 100644
--- a/src/operator/tensor/la_op.cu
+++ b/src/operator/tensor/la_op.cu
@@ -63,6 +63,30 @@ NNVM_REGISTER_OP(_linalg_sumlogdiag)
NNVM_REGISTER_OP(_backward_linalg_sumlogdiag)
.set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 2, 2, 1,
sumlogdiag_backward>);
+NNVM_REGISTER_OP(_linalg_extractdiag)
+.set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 1, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_backward_linalg_extractdiag)
+.set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 1, 2, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_linalg_makediag)
+.set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 1, 2, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_backward_linalg_makediag)
+.set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 1, 1, 1, copydiag>);
+
+NNVM_REGISTER_OP(_linalg_extracttrian)
+.set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 1, 1, 1, copytrian>);
+
+NNVM_REGISTER_OP(_backward_linalg_extracttrian)
+.set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 1, 2, 1, 1, copytrian>);
+
+NNVM_REGISTER_OP(_linalg_maketrian)
+.set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 1, 2, 1, 1, copytrian>);
+
+NNVM_REGISTER_OP(_backward_linalg_maketrian)
+.set_attr<FCompute>("FCompute<gpu>", LaOpBackward<gpu, 2, 1, 1, 1, copytrian>);
+
NNVM_REGISTER_OP(_linalg_potri)
.set_attr<FCompute>("FCompute<gpu>", LaOpForward<gpu, 2, 2, 1, 1, potri>);
diff --git a/src/operator/tensor/la_op.h b/src/operator/tensor/la_op.h
index db4607f..3b36f7c 100644
--- a/src/operator/tensor/la_op.h
+++ b/src/operator/tensor/la_op.h
@@ -129,6 +129,33 @@ struct LaSyrkParam : public dmlc::Parameter<LaSyrkParam> {
}
};
+// Parameters for diag extraction/creation.
+struct LaDiagParam : public dmlc::Parameter<LaDiagParam> {
+ int offset;
+ DMLC_DECLARE_PARAMETER(LaDiagParam) {
+ DMLC_DECLARE_FIELD(offset)
+ .set_default(0)
+ .describe("Offset of the diagonal versus the main diagonal. 0
corresponds to the main "
+ "diagonal, a negative/positive value to diagonals below/above
the main diagonal.");
+ }
+};
+
+// Parameters for trian extraction/creation.
+struct LaTrianParam : public dmlc::Parameter<LaTrianParam> {
+ int offset;
+ bool lower;
+ DMLC_DECLARE_PARAMETER(LaTrianParam) {
+ DMLC_DECLARE_FIELD(offset)
+ .set_default(0)
+ .describe("Offset of the diagonal versus the main diagonal. 0
corresponds to the main "
+ "diagonal, a negative/positive value to diagonals below/above
the main diagonal.");
+ DMLC_DECLARE_FIELD(lower)
+ .set_default(true)
+ .describe("Refer to the lower triangular matrix if lower=true, refer to
the upper otherwise."
+ " Only relevant when offset=0");
+ }
+};
+
// Common function for shape inference for matrix mult and matrix mac.
inline bool LaMatrixMultMacOpShape(const nnvm::NodeAttrs& attrs,
mxnet::ShapeVector* in_attrs,
@@ -262,6 +289,47 @@ inline bool LaReduceShape(const nnvm::NodeAttrs& attrs,
return true;
}
+template<bool diag, bool extract>
+inline bool LaDiagTrianShape(const nnvm::NodeAttrs& attrs,
+ mxnet::ShapeVector* in_attrs,
+ mxnet::ShapeVector* out_attrs) {
+ CHECK_EQ(in_attrs->size(), 1);
+ CHECK_EQ(out_attrs->size(), 1);
+ const int ndim((*in_attrs)[0].ndim());
+ // Only infer in forward direction
+ if (ndim == 0) {
+ return false;
+ }
+ const int offset = (diag ? nnvm::get<LaDiagParam>(attrs.parsed).offset
+ : nnvm::get<LaTrianParam>(attrs.parsed).offset);
+ std::vector<int> oshape(extract ? ndim-1 : ndim+1);
+ for (int i = 0; i < ndim-1; ++i) {
+ oshape[i] = (*in_attrs)[0][i];
+ }
+ if (extract) {
+ CHECK_GE(ndim, 2)
+ << "Input operand must be a tensor of matrices";
+ CHECK_EQ((*in_attrs)[0][ndim-2], (*in_attrs)[0][ndim-1])
+ << "Input operand must be a tensor of square matrices";
+ const int n((*in_attrs)[0][ndim-1]-abs(offset));
+ CHECK_GT(n, 0)
+ << "Illegal offset " << offset << " for diag/trian extraction of matrix
with dimension "
+ << ndim;
+ oshape[ndim-2] = (diag ? n : (n*(n+1))/2);
+ } else if (diag) {
+ oshape[ndim] = oshape[ndim-1] = (*in_attrs)[0][ndim-1]+abs(offset);
+ } else {
+ const int n((*in_attrs)[0][ndim-1]);
+ const int m(std::floor(0.5+(std::sqrt(8*n+1)-1.0)*0.5));
+ CHECK_EQ((m*(m+1))/2, n)
+ << "Input tensor of maketrian has an invalid dimension for the last
axis.";
+ oshape[ndim] = oshape[ndim-1] = m+abs(offset);
+ }
+ mxnet::TShape tshape(oshape.begin(), oshape.end());
+ SHAPE_ASSIGN_CHECK(*out_attrs, 0, tshape);
+ return true;
+}
+
// Shape inference function for linalg_syrk
inline bool LaSyrkShape(const nnvm::NodeAttrs& attrs,
mxnet::ShapeVector* in_attrs,
diff --git a/tests/python/unittest/test_operator.py
b/tests/python/unittest/test_operator.py
index ddcc881..e8bfaba 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -6296,6 +6296,51 @@ def test_laop_4():
#print('float32')
check_fw(test_syevd, [a_np], [u_np, l_np], np.float32)
+def test_laop_5():
+ # tests for diagonal and triangular matrix extraction and generation
+ data = mx.symbol.Variable('data')
+ # test complete range of small matrices to cover corner cases
+ for n in range(1, 10):
+ # test batched and non-batched processing
+ for b in range(3):
+ shape = (n, n) if b == 0 else (b, n, n)
+ data_in = np.random.uniform(1, 10, shape)
+ # test all legal offsets of the diagonal
+ for offs in range(1-n, n):
+ # test extraction of diagonal
+ test_diag = mx.sym.linalg.extractdiag(data, offset=offs)
+ res_diag = np.diagonal(data_in, offset=offs) if b==0 else
np.diagonal(data_in, axis1=1, axis2=2, offset=offs)
+ check_symbolic_forward(test_diag, [data_in], [res_diag])
+ check_numeric_gradient(test_diag, [data_in])
+ # test generation of diagonal matrix
+ test_diag2 = mx.sym.linalg.makediag(data, offset=offs)
+ res_diag2 = None
+ if b == 0:
+ res_diag2 = np.diagflat(res_diag, k=offs)
+ else:
+ for i in range(b):
+ res = np.reshape(np.diagflat(res_diag[i], k=offs), (1,
n, n))
+ res_diag2 = res if res_diag2 is None else
np.concatenate((res_diag2, res), axis=0)
+ check_symbolic_forward(test_diag2, [res_diag], [res_diag2])
+ check_numeric_gradient(test_diag2, [res_diag])
+ # check both settings for parameter "lower" in case of zero
offset
+ lower_vals = [True] if offs != 0 else [True, False]
+ for lower in lower_vals:
+ # test extraction of triangle by doing a full roundtrip as
the intermediate extracted
+ # triangle has different orderings than numpy.
+ test_trian = mx.sym.linalg.extracttrian(data, offset=offs,
lower=lower)
+ test_trian = mx.sym.linalg.maketrian(test_trian,
offset=offs, lower=lower)
+ extracts_lower = (offs < 0) or ((offs == 0) and lower)
+ res_trian = None
+ if b == 0:
+ res_trian = np.tril(data_in, offs) if extracts_lower
else np.triu(data_in, offs)
+ else:
+ for i in range(b):
+ res = np.tril(data_in[i], offs) if extracts_lower
else np.triu(data_in[i], offs)
+ res = np.reshape(res, (1, n, n))
+ res_trian = res if res_trian is None else
np.concatenate((res_trian, res), axis=0)
+ check_symbolic_forward(test_trian, [data_in], [res_trian])
+ check_numeric_gradient(test_trian, [data_in])
@with_seed()
def test_stack():