Re: [devel] [PATCH 1/1] saflogger: correct wait time of saflogger in resilience [#3198]

2020-07-12 Thread Thien Minh Huynh
Hi Vu,

Thanks for your comments.

Best Regards,
ThienHuynh

From: Vu Minh Nguyen 
Sent: Monday, July 13, 2020 10:37 AM
To: Thien Minh Huynh ; Thuan Tran 
; Thang Duc Nguyen 
Cc: opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1/1] saflogger: correct wait time of saflogger in 
resilience [#3198]

Ack with comments.

Regards, Vu


From: Thien Minh Huynh 
mailto:thien.m.hu...@dektech.com.au>>
Sent: Friday, July 10, 2020 8:53 AM
To: Thuan Tran mailto:thuan.t...@dektech.com.au>>; 
Thang Duc Nguyen 
mailto:thang.d.ngu...@dektech.com.au>>; Vu Minh 
Nguyen mailto:vu.m.ngu...@dektech.com.au>>
Cc: 
opensaf-devel@lists.sourceforge.net 
mailto:opensaf-devel@lists.sourceforge.net>>;
 Thien Minh Huynh 
mailto:thien.m.hu...@dektech.com.au>>
Subject: [PATCH 1/1] saflogger: correct wait time of saflogger in resilience 
[#3198]

When system running in resilience, if timeout of saflogger is
configured larger than logResilienceTimeout.The saflogger will
be loop forever.
[Vu] my suggestion:
When log resilient mode is enabled and timeout of saflogger is
configured larger than logResilienceTimeout , the saflogger will
be blocked forever until the underlying filesystem is responsive.

The fix is adding a time tracker between start poll and receive
SA_AIS_ERR_TRY_AGAIN, that to correct the wait time.
---
 src/log/tools/saf_logger.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c
index e9f7e9b36..ac7f832b3 100644
--- a/src/log/tools/saf_logger.c
+++ b/src/log/tools/saf_logger.c
@@ -149,6 +149,9 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle,
 struct pollfd fds[1];
 int ret;
 unsigned int wait_time = 0;
+   int64_t poll_timeout = g_timeout * 1000;
+   int64_t start_time = 0;
+   int64_t last_time = 0;
[Vu] Append suffixes of time unit after those time variables. e.g.
 int64_t poll_timeout_ms = g_timeout * 1000;

 i++;

@@ -176,8 +179,13 @@ retry:
 fds[0].fd = (int)selectionObject;
 fds[0].events = POLLIN;

+   poll_timeout -= (last_time - start_time);
+   if (poll_timeout < 0)
+   poll_timeout = 0;
+   start_time = get_current_SaTime() / 100;

+
 poll_retry:
-   ret = poll(fds, 1, g_timeout*1000);
+   ret = poll(fds, 1, poll_timeout);

 if (ret == -1 && errno == EINTR)
 goto poll_retry;
@@ -188,6 +196,10 @@ poll_retry:
 }

 if (ret == 0) {
+   if (poll_timeout < g_timeout * 1000) {
[Vu] 'g_timeout*1000' is calculated twice, consider creating a common variable.
e.g. max_poll_timeout_ms = g_timeout*1000;

+   wait_time += poll_timeout * 1000;
+   goto retry_timeout;
[Vu]
'Go to retry_timeout' is confusing me. How about the if .. else?
e.g.
if (ret == 0 && poll_timeout == g_timeout*1000) {
// poll timeout
} else if (ret == 0) {
// has re-tried sometimes but still gets failed
}

+   }
 fprintf(stderr,
 "poll timeout, message %u was most likely lost\n", i);
 return SA_AIS_ERR_BAD_OPERATION;
@@ -209,7 +221,8 @@ poll_retry:
 if (cb_error == SA_AIS_ERR_TRY_AGAIN &&
 wait_time < g_timeout*ONE_SECOND_TO_NS) {
 usleep(HUNDRED_MS);
-   wait_time += HUNDRED_MS;
+   last_time = get_current_SaTime() / 100;
+   wait_time += (last_time - start_time) * 1000;
[Vu] It may be better to name these constant variables to descriptive names
e.g.:
US_TO_NS = 1000 * 1000;
SECOND_TO_MS = 1000;


 goto retry;
 }

@@ -219,6 +232,7 @@ poll_retry:
 goto retry;
 }

+retry_timeout:
 if (cb_error != SA_AIS_OK) {
 if (wait_time)
 fprintf(stderr, "Waited for %u seconds.\n",
--
2.17.1

___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


Re: [devel] [PATCH 1/1] saflogger: correct wait time of saflogger in resilience [#3198]

2020-07-12 Thread Vu Minh Nguyen
Ack with comments.

Regards, Vu


From: Thien Minh Huynh 
Sent: Friday, July 10, 2020 8:53 AM
To: Thuan Tran ; Thang Duc Nguyen 
; Vu Minh Nguyen 
Cc: opensaf-devel@lists.sourceforge.net ; 
Thien Minh Huynh 
Subject: [PATCH 1/1] saflogger: correct wait time of saflogger in resilience 
[#3198]

When system running in resilience, if timeout of saflogger is
configured larger than logResilienceTimeout.The saflogger will
be loop forever.
[Vu] my suggestion:
When log resilient mode is enabled and timeout of saflogger is
configured larger than logResilienceTimeout , the saflogger will
be blocked forever until the underlying filesystem is responsive.

The fix is adding a time tracker between start poll and receive
SA_AIS_ERR_TRY_AGAIN, that to correct the wait time.
---
 src/log/tools/saf_logger.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c
index e9f7e9b36..ac7f832b3 100644
--- a/src/log/tools/saf_logger.c
+++ b/src/log/tools/saf_logger.c
@@ -149,6 +149,9 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle,
 struct pollfd fds[1];
 int ret;
 unsigned int wait_time = 0;
+   int64_t poll_timeout = g_timeout * 1000;
+   int64_t start_time = 0;
+   int64_t last_time = 0;
[Vu] Append suffixes of time unit after those time variables. e.g.
 int64_t poll_timeout_ms = g_timeout * 1000;

 i++;

@@ -176,8 +179,13 @@ retry:
 fds[0].fd = (int)selectionObject;
 fds[0].events = POLLIN;

+   poll_timeout -= (last_time - start_time);
+   if (poll_timeout < 0)
+   poll_timeout = 0;
+   start_time = get_current_SaTime() / 100;

+
 poll_retry:
-   ret = poll(fds, 1, g_timeout*1000);
+   ret = poll(fds, 1, poll_timeout);

 if (ret == -1 && errno == EINTR)
 goto poll_retry;
@@ -188,6 +196,10 @@ poll_retry:
 }

 if (ret == 0) {
+   if (poll_timeout < g_timeout * 1000) {
[Vu] 'g_timeout*1000' is calculated twice, consider creating a common variable.
e.g. max_poll_timeout_ms = g_timeout*1000;

+   wait_time += poll_timeout * 1000;
+   goto retry_timeout;
[Vu]
'Go to retry_timeout' is confusing me. How about the if .. else?
e.g.
if (ret == 0 && poll_timeout == g_timeout*1000) {
// poll timeout
} else if (ret == 0) {
// has re-tried sometimes but still gets failed
}

+   }
 fprintf(stderr,
 "poll timeout, message %u was most likely lost\n", i);
 return SA_AIS_ERR_BAD_OPERATION;
@@ -209,7 +221,8 @@ poll_retry:
 if (cb_error == SA_AIS_ERR_TRY_AGAIN &&
 wait_time < g_timeout*ONE_SECOND_TO_NS) {
 usleep(HUNDRED_MS);
-   wait_time += HUNDRED_MS;
+   last_time = get_current_SaTime() / 100;
+   wait_time += (last_time - start_time) * 1000;
[Vu] It may be better to name these constant variables to descriptive names
e.g.:
US_TO_NS = 1000 * 1000;
SECOND_TO_MS = 1000;


 goto retry;
 }

@@ -219,6 +232,7 @@ poll_retry:
 goto retry;
 }

+retry_timeout:
 if (cb_error != SA_AIS_OK) {
 if (wait_time)
 fprintf(stderr, "Waited for %u seconds.\n",
--
2.17.1


___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel


[devel] [PATCH 1/1] saflogger: correct wait time of saflogger in resilience [#3198]

2020-07-09 Thread thien.m.huynh
When system running in resilience, if timeout of saflogger is
configured larger than logResilienceTimeout.The saflogger will
be loop forever.
The fix is adding a time tracker between start poll and receive
SA_AIS_ERR_TRY_AGAIN, that to correct the wait time.
---
 src/log/tools/saf_logger.c | 18 --
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/log/tools/saf_logger.c b/src/log/tools/saf_logger.c
index e9f7e9b36..ac7f832b3 100644
--- a/src/log/tools/saf_logger.c
+++ b/src/log/tools/saf_logger.c
@@ -149,6 +149,9 @@ static SaAisErrorT write_log_record(SaLogHandleT logHandle,
struct pollfd fds[1];
int ret;
unsigned int wait_time = 0;
+   int64_t poll_timeout = g_timeout * 1000;
+   int64_t start_time = 0;
+   int64_t last_time = 0;
 
i++;
 
@@ -176,8 +179,13 @@ retry:
fds[0].fd = (int)selectionObject;
fds[0].events = POLLIN;
 
+   poll_timeout -= (last_time - start_time);
+   if (poll_timeout < 0)
+   poll_timeout = 0;
+   start_time = get_current_SaTime() / 100;
+
 poll_retry:
-   ret = poll(fds, 1, g_timeout*1000);
+   ret = poll(fds, 1, poll_timeout);
 
if (ret == -1 && errno == EINTR)
goto poll_retry;
@@ -188,6 +196,10 @@ poll_retry:
}
 
if (ret == 0) {
+   if (poll_timeout < g_timeout * 1000) {
+   wait_time += poll_timeout * 1000;
+   goto retry_timeout;
+   }
fprintf(stderr,
"poll timeout, message %u was most likely lost\n", i);
return SA_AIS_ERR_BAD_OPERATION;
@@ -209,7 +221,8 @@ poll_retry:
if (cb_error == SA_AIS_ERR_TRY_AGAIN &&
wait_time < g_timeout*ONE_SECOND_TO_NS) {
usleep(HUNDRED_MS);
-   wait_time += HUNDRED_MS;
+   last_time = get_current_SaTime() / 100;
+   wait_time += (last_time - start_time) * 1000;
goto retry;
}
 
@@ -219,6 +232,7 @@ poll_retry:
goto retry;
}
 
+retry_timeout:
if (cb_error != SA_AIS_OK) {
if (wait_time)
fprintf(stderr, "Waited for %u seconds.\n",
-- 
2.17.1



___
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel