mozga-intel commented on a change in pull request #20533:
URL: https://github.com/apache/incubator-mxnet/pull/20533#discussion_r693905361
##########
File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
##########
@@ -305,17 +305,28 @@ inline static mkldnn::memory::desc GetMemDesc(const
NDArray& arr, int dtype = -1
return mkldnn::memory::desc{dims, get_mkldnn_type(dtype),
mkldnn::memory::format_tag::any};
}
-inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr, int
dtype = -1) {
+inline static bool ChooseBRGEMMImpl(mkldnn::memory::dims weight_dims, size_t
batch_size) {
+ // Conditions based on measurement results done on CLX8280
+ // https://github.com/apache/incubator-mxnet/pull/20533
+ return weight_dims[0] % 64 == 0 && weight_dims[1] % 64 == 0 &&
weight_dims[0] >= 1024 &&
+ weight_dims[1] >= 1024 && batch_size >= 2 << 13;
Review comment:
2<<13: What are the advantages of using this specific notation? 2<<13 -
this mathematical notation stay so hard to read.
##########
File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
##########
@@ -305,17 +305,28 @@ inline static mkldnn::memory::desc GetMemDesc(const
NDArray& arr, int dtype = -1
return mkldnn::memory::desc{dims, get_mkldnn_type(dtype),
mkldnn::memory::format_tag::any};
}
-inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr, int
dtype = -1) {
+inline static bool ChooseBRGEMMImpl(mkldnn::memory::dims weight_dims, size_t
batch_size) {
+ // Conditions based on measurement results done on CLX8280
+ // https://github.com/apache/incubator-mxnet/pull/20533
+ return weight_dims[0] % 64 == 0 && weight_dims[1] % 64 == 0 &&
weight_dims[0] >= 1024 &&
Review comment:
There's better to check weight_dims[0] >= 1024 firstly to avoid '%".
##########
File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
##########
@@ -305,17 +305,28 @@ inline static mkldnn::memory::desc GetMemDesc(const
NDArray& arr, int dtype = -1
return mkldnn::memory::desc{dims, get_mkldnn_type(dtype),
mkldnn::memory::format_tag::any};
}
-inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr, int
dtype = -1) {
+inline static bool ChooseBRGEMMImpl(mkldnn::memory::dims weight_dims, size_t
batch_size) {
Review comment:
How about passing weight_dims by reference? It looks like that dims is a
vector.
##########
File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
##########
@@ -305,17 +305,28 @@ inline static mkldnn::memory::desc GetMemDesc(const
NDArray& arr, int dtype = -1
return mkldnn::memory::desc{dims, get_mkldnn_type(dtype),
mkldnn::memory::format_tag::any};
}
-inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr, int
dtype = -1) {
+inline static bool ChooseBRGEMMImpl(mkldnn::memory::dims weight_dims, size_t
batch_size) {
+ // Conditions based on measurement results done on CLX8280
+ // https://github.com/apache/incubator-mxnet/pull/20533
+ return weight_dims[0] % 64 == 0 && weight_dims[1] % 64 == 0 &&
weight_dims[0] >= 1024 &&
Review comment:
Those comprehensive test have shown us a lot of things. Could you please
tell me what the criteria of acceptance are being used here? For brevity: how
the boundaries are determined?
##########
File path: docs/static_site/src/pages/api/faq/env_var.md
##########
@@ -298,9 +298,9 @@ If ctypes is used, it must be
`mxnet._ctypes.ndarray.NDArrayBase`.
- Values: Int ```(default=-1)```
- Flag to set num of elements that MKLDNN cache can hold. Default is -1
which means cache size is unbounded. Should only be set if your model has
variable input shapes, as cache size may grow unbounded. The number represents
the number of items in the cache and is proportional to the number of layers
that use MKLDNN and different input shape.
-* MXNET_MKLDNN_DISABLE_BRGEMM_FC
- - Values: 0, 1 ```(default=1)```
- - Flag which disables BRGEMM kernels in FullyConnected executed with MKLDNN
support - Should only be set to 0 if your model has constant input shapes or
FullyConnected is calculated with large tensors. Supported on machines with
AVX512-VNNI.
+* MXNET_MKLDNN_FORCE_FC_AB_FORMAT
+ - Values: 0, 1 ```(default=0)```
+ - If set to true, FullyConnected will use only AB format for weights, thus
MXNet won't use BRGEMM implementation of FC on machines with AVX512-VNNI
support.
Review comment:
thus -> Otherwise
##########
File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
##########
@@ -305,17 +305,28 @@ inline static mkldnn::memory::desc GetMemDesc(const
NDArray& arr, int dtype = -1
return mkldnn::memory::desc{dims, get_mkldnn_type(dtype),
mkldnn::memory::format_tag::any};
}
-inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr, int
dtype = -1) {
+inline static bool ChooseBRGEMMImpl(mkldnn::memory::dims weight_dims, size_t
batch_size) {
+ // Conditions based on measurement results done on CLX8280
+ // https://github.com/apache/incubator-mxnet/pull/20533
+ return weight_dims[0] % 64 == 0 && weight_dims[1] % 64 == 0 &&
weight_dims[0] >= 1024 &&
+ weight_dims[1] >= 1024 && batch_size >= 2 << 13;
+}
+
+inline static mkldnn::memory::desc GetFCWeightDesc(const NDArray& arr,
+ size_t batch_size,
+ int dtype = -1) {
Review comment:
Whats does the value (-1) mean?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]