Hi Alex,

Yes, the m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC adjustment is used for SaTimeT timeout input. But in our case, the timeout duration is set by CPND_WAIT_TIME, this timeout is calculated based on the checkpoint buffer size. It is not big enough to be adjusted. I checked all uses of cpnd_tmr_start() but the same issue doesn't appear in any other places.

There may be an issue in cpnd_evt_proc_nd2nd_ckpt_sect_exptmr_req() that the timeout duration is passed to cpnd_tmr_start() without applying the m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC adjustment. But it seems to be out of scope for this ticket. I will check if an ticket for it is necessary.

Thank you.


--
Best regards,
Hoa Le

On 03/07/2018 03:34 AM, Alex Jones wrote:

Hi Hoa,


    Ack from me. I was able to reproduce it in UML by setting the value to 512.


    But, did you check the other uses of cpnd_tmr_start() to see if they have the issue?


    It looks like the correct reason to remove the m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC adjustment for your case is because it is already set by CPND_WAIT_TIME above. Is that right?


    It looks like the call in cpnd_evt_proc_ckpt_sect_create() or cpnd_evt_proc_nd2nd_ckpt_sect_exptmr_req() might have the same problem. Can you check?


Alex


On 03/05/2018 09:02 PM, Hoa Le wrote:
------------------------------------------------------------------------
NOTICE: This email was received from an EXTERNAL sender
------------------------------------------------------------------------

Hi Alex,

This may relate to performance of the testing node.
In my testing environment, the test case failed with "SaSizeT large_buffer_size = 10000000;" on one specific node, and on other nodes, the test case failed only when I set the large_buffer_size to 25600000.
Can you help re-test it with a larger buffer size (i.e. 51200000) ?

Attached are traces and logs when running the test case without the change in ckptnd.

/<143>1 2018-03-06T08:53:56.72482+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1029"] 350:ckpt/common/cpsv_evt.c:2839 TR cpnd <<== [1] CPND_EVT_TIME_OUT(type=REPL_RSP_EXPI(3)) from CPD// //<143>1 2018-03-06T08:53:56.724844+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1030"] 350:ckpt/ckptnd/cpnd_evt.c:4679 >> cpnd_evt_proc_timer_expiry // //<143>1 2018-03-06T08:53:56.724867+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1031"] 350:ckpt/ckptnd/cpnd_evt.c:4742 TR   Before Calling m_NCS_TMR_DESTROY  tmr->ckpt_id 1  tmr->type 3// //<143>1 2018-03-06T08:53:56.724889+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1032"] 350:ckpt/ckptnd/cpnd_proc.c:1797 >> cpnd_all_repl_rsp_expiry // //<143>1 2018-03-06T08:53:56.72491+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1033"] 350:ckpt/ckptnd/cpnd_mds.c:1150 >> cpnd_mds_send_rsp // //<143>1 2018-03-06T08:53:56.724932+07:00 SC-1 osafckptnd 350 osafckptnd [meta sequenceId="1034"] 350:ckpt/common/cpsv_evt.c:2830 TR cpnd ==>> CPA_EVT_ND2A_CKPT_DATA_RSP(err=5, type=OVWRITE(4)) to node 0x2010F/

Thanks.

--
Best regards,
Hoa Le
On 03/06/2018 07:33 AM, Jones, Alex wrote:

Hi Hoa,


  When I run your new test without the change in ckptnd, it passes. Shouldn't it fail?


Alex


------------------------------------------------------------------------
*From:* Hoa Le <hoa...@dektech.com.au>
*Sent:* Thursday, March 1, 2018 2:45:32 AM
*To:* alex.jo...@genband.com; ravisekhar.ko...@oracle.com
*Cc:* opensaf-devel@lists.sourceforge.net; Hoa Le
*Subject:* [PATCH 1/1] cpnd: Correct duration of cpnd_tmr_start in cpnd_proc_update_remote [#2787]
------------------------------------------------------------------------
NOTICE: This email was received from an EXTERNAL sender
------------------------------------------------------------------------

In cpnd_proc_update_remote() function, cpnd_tmr_start is invoked with the timer duration parameter being adjusted by m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC. This duration is 0 in most cases, which will lead to SA_AIS_ERR_TIMEOUT error
of checkpoint write action if the checkpoint data is big enough.

This patch corrects the duration of cpnd_tmr_start in cpnd_proc_update_remote
and add a new test case (ckpttest 20 11) to verify the correction.
---
src/ckpt/apitest/test_cpa.c | 89 +++++++++++++++++++++++++++++++++++++++
src/ckpt/apitest/test_cpsv.h | 1 +
src/ckpt/apitest/test_cpsv_conf.h | 7 +--
src/ckpt/ckptnd/cpnd_proc.c | 4 +-
4 files changed, 95 insertions(+), 6 deletions(-)

diff --git a/src/ckpt/apitest/test_cpa.c b/src/ckpt/apitest/test_cpa.c
index 792eb27..0cc38a4 100644
--- a/src/ckpt/apitest/test_cpa.c
+++ b/src/ckpt/apitest/test_cpa.c
@@ -320,6 +320,8 @@ void fill_testcase_data()
fill_ckpt_attri(&tcd.my_app,
SA_CKPT_CHECKPOINT_COLLOCATED | SA_CKPT_WR_ALL_REPLICAS,
140, SA_TIME_END, 2, 85, 3);
+ fill_ckpt_attri(&tcd.large_buffer_attrs, SA_CKPT_WR_ALL_REPLICAS,
+ 4096, 100, 2, 51200000, 3);

fill_ckpt_name(&tcd.all_replicas_ckpt,
"safCkpt=all_replicas_ckpt,safApp=safCkptService");
@@ -417,6 +419,7 @@ void fill_testcase_data()
SA_TIME_END);
fill_sec_attri(&tcd.section_attr_with_too_long_id,
&tcd.too_long_section_id, SA_TIME_END);
+ fill_sec_attri(&tcd.large_buffer_sec, &tcd.section1, SA_TIME_END);

strcpy(tcd.data1, "This is data1");
strcpy(tcd.data2, "This is data2");
@@ -491,6 +494,8 @@ void fill_testcase_data()
tcd.sec_invalid = 6;

fill_ckpt_name(&tcd.invalidName2, "none");
+ fill_ckpt_name(&tcd.large_buffer_ckpt,
+ "safCkpt=large_dataBuffer_ckpt,safApp=safCkptService");
}

void test_cpsv_cleanup(CPSV_CLEANUP_TC_TYPE tc)
@@ -7196,6 +7201,87 @@ final1:
test_validate(result, TEST_PASS);
}

+void cpsv_it_overwrite_13()
+{
+ SaAisErrorT rc;
+ int result, result1;
+
+ SaSizeT large_buffer_size = 25600000;
+ char *large_buffer;
+
+ printHead("To verify that overwrite writes into a section with large"
+ " dataBuffer");
+
+ result = test_ckptInitialize(CKPT_INIT_SUCCESS_T, TEST_CONFIG_MODE);
+ if (result != TEST_PASS)
+ goto final1;
+
+ rc = saCkptCheckpointOpen(tcd.ckptHandle, &(tcd.large_buffer_ckpt),
+ &(tcd.large_buffer_attrs), SA_CKPT_CHECKPOINT_CREATE |
+ SA_CKPT_CHECKPOINT_WRITE | SA_CKPT_CHECKPOINT_READ,
+ SA_TIME_ONE_SECOND, &tcd.large_buffer_hdl);
+ result = cpsv_test_result(rc, SA_AIS_OK,
+ "Created large_dataBuffer_ckpt with all flags and"
+ " large maxSectionSize", TEST_CONFIG_MODE);
+ if (result == TEST_PASS)
+ m_TEST_CPSV_PRINTF(" Checkpoint Handle: %llu\n",
+ tcd.large_buffer_hdl);
+ else
+ goto final2;
+
+ rc = saCkptSectionCreate(tcd.large_buffer_hdl, &tcd.large_buffer_sec,
+ &tcd.data1, tcd.size);
+ result = cpsv_test_result(rc, SA_AIS_OK, "Created Section id 11",
+ TEST_CONFIG_MODE);
+ if (result != TEST_PASS)
+ goto final3;
+
+ large_buffer = (char *)malloc((large_buffer_size+1)*sizeof(char));
+ if (!large_buffer){
+ m_TEST_CPSV_PRINTF("\nOut of memory\n");
+ result = TEST_FAIL;
+ goto final3;
+ }
+ memset(large_buffer, 'a', large_buffer_size);
+ large_buffer[large_buffer_size] = '\0';
+ rc = saCkptSectionOverwrite(tcd.large_buffer_hdl, &tcd.section1,
+ large_buffer, large_buffer_size);
+ result = cpsv_test_result(rc, SA_AIS_OK,
+ "OverWrite in section 11 with large dataBuffer",
+ TEST_NONCONFIG_MODE);
+ if (rc == SA_AIS_OK)
+ m_TEST_CPSV_PRINTF(" DataSize: %llu\n", large_buffer_size);
+ if (result != TEST_PASS)
+ goto final4;
+
+ rc = saCkptCheckpointRead(tcd.large_buffer_hdl, &tcd.general_read,
+ tcd.nOfE, &tcd.ind);
+ result = cpsv_test_result(rc, SA_AIS_OK,
+ "Read from section 11", TEST_CONFIG_MODE);
+ if (result != TEST_PASS)
+ goto final4;
+
+ if (strncmp(large_buffer, tcd.general_read.dataBuffer,
+ tcd.general_read.readSize) != 0)
+ result = TEST_FAIL;
+
+final4:
+ free(large_buffer);
+final3:
+ rc = saCkptCheckpointUnlink(tcd.ckptHandle, &tcd.large_buffer_ckpt);
+ result1 = cpsv_test_result(rc, SA_AIS_OK,
+ "Unlinked large_dataBuffer_ckpt", TEST_CONFIG_MODE);
+ if (result1 != TEST_PASS){
+ m_TEST_CPSV_PRINTF("\n Unlink failed ckpt not cleanedup\n");
+ result = result1;
+ }
+final2:
+ test_cpsv_cleanup(CPSV_CLEAN_INIT_SUCCESS_T);
+final1:
+ printResult(result);
+ test_validate(result, TEST_PASS);
+}
+
/******** OpenCallback *******/

void cpsv_it_openclbk_01()
@@ -8392,6 +8478,9 @@ __attribute__((constructor)) static void ckpt_cpa_test_constructor(void)
"To verify overwrite when NULL dataBuffer is provided");
test_case_add(20, cpsv_it_overwrite_12,
"To verify overwrite when NULL sectionId is provided");
+ test_case_add(20, cpsv_it_overwrite_13,
+ "To verify that overwrite writes into a section with"
+ " large dataBuffer");

test_suite_add(21, "CKPT OpenCallBack");
test_case_add(
diff --git a/src/ckpt/apitest/test_cpsv.h b/src/ckpt/apitest/test_cpsv.h
index 97c78f2..7acb622 100644
--- a/src/ckpt/apitest/test_cpsv.h
+++ b/src/ckpt/apitest/test_cpsv.h
@@ -919,6 +919,7 @@ void cpsv_it_overwrite_09(void);
void cpsv_it_overwrite_10(void);
void cpsv_it_overwrite_11(void);
void cpsv_it_overwrite_12(void);
+void cpsv_it_overwrite_13(void);
void cpsv_it_openclbk_01(void);
void cpsv_it_openclbk_02(void);
void cpsv_it_syncclbk_01(void);
diff --git a/src/ckpt/apitest/test_cpsv_conf.h b/src/ckpt/apitest/test_cpsv_conf.h
index 68522de..55f268d 100644
--- a/src/ckpt/apitest/test_cpsv_conf.h
+++ b/src/ckpt/apitest/test_cpsv_conf.h
@@ -73,7 +73,7 @@ struct cpsv_testcase_data {
SaCkptCheckpointCreationAttributesT ckpt_all_collocated_replica,
ckpt_weak_collocated_replica, multi_io_replica;
SaCkptCheckpointCreationAttributesT invalid, invalid2, invalid3, invalid4,
- invalid_collocated, my_app;
+ invalid_collocated, my_app, large_buffer_attrs;

SaCkptCheckpointHandleT all_replicas_Createhdl, all_replicas_Writehdl,
all_replicas_Readhdl, all_replicas_create_after_unlink;
@@ -86,7 +86,7 @@ struct cpsv_testcase_data {
SaCkptCheckpointHandleT all_collocated_Readhdl, weak_collocated_Createhdl,
weak_collocated_Writehdl, weak_collocated_Readhdl;
SaCkptCheckpointHandleT multi_io_hdl, uninitckptHandle, testHandle,
- async_all_replicas_hdl, open_clbk_hdl;
+ async_all_replicas_hdl, open_clbk_hdl, large_buffer_hdl;

SaNameT all_replicas_ckpt, active_replica_ckpt, weak_replica_ckpt,
collocated_ckpt, async_all_replicas_ckpt, async_active_replica_ckpt;
@@ -106,6 +106,7 @@ struct cpsv_testcase_data {
special_attr, special_attr2, special_attr3, invalid_attr, multi_attr;
SaCkptSectionCreationAttributesT section_attr_with_long_id;
SaCkptSectionCreationAttributesT section_attr_with_too_long_id;
+ SaCkptSectionCreationAttributesT large_buffer_sec;
char data1[14], data2[14], data3[14];
SaSizeT size, size_zero;

@@ -136,7 +137,7 @@ struct cpsv_testcase_data {
secjunkHandle, badHandle;
SaCkptSectionsChosenT sec_forever, sec_any, exp_leq, exp_geq, sec_corrupt,
sec_invalid;
- SaNameT invalidName2, invalidName;
+ SaNameT invalidName2, invalidName, large_buffer_ckpt;
SaInvocationT open_clbk_invo;
SaInvocationT sync_clbk_invo;
SaAisErrorT open_clbk_err;
diff --git a/src/ckpt/ckptnd/cpnd_proc.c b/src/ckpt/ckptnd/cpnd_proc.c
index 367ef34..c2438c7 100644
--- a/src/ckpt/ckptnd/cpnd_proc.c
+++ b/src/ckpt/ckptnd/cpnd_proc.c
@@ -1489,9 +1489,7 @@ uint32_t cpnd_proc_update_remote(CPND_CB *cb, CPND_CKPT_NODE *cp_node,
all_repl_evt->write_rsp_tmr.write_type =
in_evt->info.ckpt_write.type;
rc = cpnd_tmr_start(
- &all_repl_evt->write_rsp_tmr,
- m_CPSV_CONVERT_SATIME_TEN_MILLI_SEC(
- timeout));
+ &all_repl_evt->write_rsp_tmr, timeout);

while (head != NULL) {
rc = cpnd_mds_msg_send(
--
2.7.4



------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to