mozga-intel commented on a change in pull request #20338:
URL: https://github.com/apache/incubator-mxnet/pull/20338#discussion_r649842161
##########
File path: src/c_api/c_api.cc
##########
@@ -1573,6 +1578,42 @@ int MXRandomSeedContext(int seed, int dev_type, int
dev_id) {
API_END();
}
+int MXSetFlushDenorms(bool value) {
+ API_BEGIN();
+ // FTZ only applies to SSE and AVX instructions.
+ #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) &&
_M_IX86_FP >= 1)
+ auto is_dmz_flag_available = []() {
Review comment:
To be very picky here, I'd rather prefer to see a bool type instead of
auto type. Well. If the lambda's body is a single return statement, then the
return type might be defined explicitly,
##########
File path: src/c_api/c_api.cc
##########
@@ -1573,6 +1578,42 @@ int MXRandomSeedContext(int seed, int dev_type, int
dev_id) {
API_END();
}
+int MXSetFlushDenorms(bool value) {
+ API_BEGIN();
+ // FTZ only applies to SSE and AVX instructions.
+ #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) &&
_M_IX86_FP >= 1)
+ auto is_dmz_flag_available = []() {
+ // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1
+ // "Checking for the DAZ Flag in the MXCSR Register"
+ constexpr unsigned int mxcsr_mask_offset = 28;
+ constexpr unsigned int dmz_flag_offset = 5;
+ constexpr unsigned int fxsave_req_bytes = 512;
+
+ char* fxsave_area_ptr =
reinterpret_cast<char*>(malloc(fxsave_req_bytes));
+ memset(fxsave_area_ptr, 0, fxsave_req_bytes); // fill memory with 0
+ _fxsave(fxsave_area_ptr);
+
+ char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset;
+ uint32_t mxcsr_mask = *(reinterpret_cast<uint32_t*>((mxcsr_mask_ptr)));
+ bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1;
+ free(fxsave_area_ptr);
+ return dmz_flag;
+ };
+
+ const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON :
_MM_DENORMALS_ZERO_OFF;
+ const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON :
_MM_FLUSH_ZERO_OFF;
+
+ _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE);
+ // If the DAZ flag is not supported, then it is a reserved bit and
attempting to write a 1
Review comment:
It's worth saying that the DAZ flag is supported when the sixth bit is
hot.
##########
File path: src/c_api/c_api.cc
##########
@@ -1573,6 +1578,42 @@ int MXRandomSeedContext(int seed, int dev_type, int
dev_id) {
API_END();
}
+int MXSetFlushDenorms(bool value) {
+ API_BEGIN();
+ // FTZ only applies to SSE and AVX instructions.
+ #if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) &&
_M_IX86_FP >= 1)
+ auto is_dmz_flag_available = []() {
+ // Intel 64 and IA-32 Architectures Software Developer’s Manual: Vol. 1
+ // "Checking for the DAZ Flag in the MXCSR Register"
+ constexpr unsigned int mxcsr_mask_offset = 28;
+ constexpr unsigned int dmz_flag_offset = 5;
+ constexpr unsigned int fxsave_req_bytes = 512;
+
+ char* fxsave_area_ptr =
reinterpret_cast<char*>(malloc(fxsave_req_bytes));
+ memset(fxsave_area_ptr, 0, fxsave_req_bytes); // fill memory with 0
+ _fxsave(fxsave_area_ptr);
+
+ char* mxcsr_mask_ptr = fxsave_area_ptr + mxcsr_mask_offset;
+ uint32_t mxcsr_mask = *(reinterpret_cast<uint32_t*>((mxcsr_mask_ptr)));
+ bool dmz_flag = (mxcsr_mask >> dmz_flag_offset) & 0x1;
+ free(fxsave_area_ptr);
+ return dmz_flag;
+ };
+
+ const unsigned int DMZ_STATE = value ? _MM_DENORMALS_ZERO_ON :
_MM_DENORMALS_ZERO_OFF;
+ const unsigned int FTZ_STATE = value ? _MM_FLUSH_ZERO_ON :
_MM_FLUSH_ZERO_OFF;
+
+ _MM_SET_FLUSH_ZERO_MODE(FTZ_STATE);
+ // If the DAZ flag is not supported, then it is a reserved bit and
attempting to write a 1
Review comment:
It's worth saying that the DAZ flag is supported when the sixth bit of
MXCSR_MASK is hot.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]