On 06/10/17 02:28 AM, Tom Robinson wrote:

On 06/10/17 08:58, Jean-Louis Martineau wrote:
On 05/10/17 05:48 PM, Tom Robinson wrote:
Hi,

Splitting this out of the other thread for clarity.

What does this error mean?

FAILED [shm_ring cancelled]

There is already a patch posted to fix that issue, try it or upgrade to amanda-3.5
The patch is attached.


Also getting:

FAILED [02-00098]
FAILED [06-00229]
FAILED [12-00205]

These report as 'FAILED' but further on into the report they appear to have completed without issue. What do the error codes signify (e.g. FAILED [02-00098] etc.)?
As you found,these backup where retried and succeeded, so you can ignore these messages.
I committed the attached patch to fix the issue

Jean-Louis
This is unexpected.
Can you post the amdump.<DATASTAMP,the log.<DATESTAMP>.0 files and the dumper debug files?


Attached as requested.
This message is the property of CARBONITE, INC. and may contain confidential or 
privileged information.
If this message has been delivered to you by mistake, then do not copy or 
deliver this message to anyone.  Instead, destroy it and notify me by reply 
e-mail
diff --git a/server-src/driver.c b/server-src/driver.c
index f025916..b833617 100644
--- a/server-src/driver.c
+++ b/server-src/driver.c
@@ -1799,6 +1799,7 @@ start_some_dumps(
 	    job->dumper = dumper;
 	    dumper->job = job;
 	    sp->timestamp = now;
+	    g_free(sp->try_again_message);
 	    amfree(sp->disk->dataport_list);
 
 	    dumper->busy = 1;		/* dumper is now busy */
@@ -1848,6 +1849,7 @@ start_some_dumps(
 	    dumper->job = job;
 
 	    sp->timestamp = now;
+	    g_free(sp->try_again_message);
 	    amfree(sp->disk->dataport_list);
 
 	    dumper->busy = 1;		/* dumper is now busy */
@@ -3531,7 +3533,9 @@ handle_dumper_result(
 	     * Requeue this disk, and fall through to the FAILED
 	     * case for cleanup.
 	     */
-	    if (sp->dump_attempted >= dp->retry_dump-1) {
+	    g_free(sp->try_again_message);
+	    sp->try_again_message = g_strdup(result_argv[2]);
+	    if (sp->dump_attempted >= dp->retry_dump) {
 		char *qname = quote_string(dp->name);
 		char *qerr = quote_string(result_argv[2]);
 		log_add(L_FAIL, _("%s %s %s %d [too many dumper retry: %s]"),
@@ -3648,7 +3652,10 @@ handle_dumper_result(
 	    /* send the dumper result to the chunker */
 	    if (job->chunker) {
 		if (cmd == TRYAGAIN) {
-		    chunker_cmd(job->chunker, ABORT, sp, "dumper TRYAGAIN");
+		    char *abort_message = g_strdup_printf("dumper TRYAGAIN: %s",
+						sp->try_again_message);
+		    chunker_cmd(job->chunker, ABORT, sp, abort_message);
+		    g_free(abort_message);
 		    pending_aborts++;
 		} else if (job->chunker->sendresult) {
 		    if (cmd == DONE) {
@@ -3666,7 +3673,10 @@ handle_dumper_result(
 		    wtaper = job->wtaper;
 		    taper = wtaper->taper;
 		    if (cmd == TRYAGAIN) {
-			taper_cmd(taper, wtaper, ABORT, sp, NULL, 0, "dumper TRYAGAIN");
+			char *abort_message = g_strdup_printf("dumper TRYAGAIN: %s",
+							sp->try_again_message);
+			taper_cmd(taper, wtaper, ABORT, sp, NULL, 0, abort_message);
+			g_free(abort_message);
 		    } else if (cmd == DONE && wtaper->sendresult) {
 			taper_cmd(taper, wtaper, DONE, sp, NULL, 0, NULL);
 			wtaper->sendresult = FALSE;
diff --git a/server-src/driverio.h b/server-src/driverio.h
index d9f75ca..6ea410f 100644
--- a/server-src/driverio.h
+++ b/server-src/driverio.h
@@ -129,6 +129,7 @@ typedef struct sched_s {
     char *src_pool;
     char *src_label;
     int   src_fileno;
+    char *try_again_message;
 } sched_t;
 
 typedef struct schedlist_s {
diff --git a/server-src/dumper.c b/server-src/dumper.c
index b21e7e8..024557c 100644
--- a/server-src/dumper.c
+++ b/server-src/dumper.c
@@ -825,8 +825,7 @@ main(
 		q = quote_string(errstr);
 		putresult(rc == 2? FAILED : TRYAGAIN, "%s %s\n",
 		    handle, q);
-		if (rc == 2)
-		    log_add(L_FAIL, "%s %s %s %d [%s]", hostname, qdiskname,
+		log_add(L_FAIL, "%s %s %s %d [%s]", hostname, qdiskname,
 			dumper_timestamp, level, errstr);
 		amfree(q);
 	    } else {
diff --git a/installcheck/Makefile.am b/installcheck/Makefile.am
index 0346cec..06fa098 100644
--- a/installcheck/Makefile.am
+++ b/installcheck/Makefile.am
@@ -103,6 +103,7 @@ full_tests = \
 	failure-client-encrypt-end \
 	failure-client-encrypt-not-found \
 	failure-client-encrypt-no-exec \
+	failure-dumper-try-again \
 	failure-server-custom-compress \
 	failure-server-custom-compress-end \
 	failure-server-custom-compress-not-found \
diff --git a/server-src/driver.c b/server-src/driver.c
index f025916..b833617 100644
--- a/server-src/driver.c
+++ b/server-src/driver.c
@@ -1799,6 +1799,7 @@ start_some_dumps(
 	    job->dumper = dumper;
 	    dumper->job = job;
 	    sp->timestamp = now;
+	    g_free(sp->try_again_message);
 	    amfree(sp->disk->dataport_list);
 
 	    dumper->busy = 1;		/* dumper is now busy */
@@ -1848,6 +1849,7 @@ start_some_dumps(
 	    dumper->job = job;
 
 	    sp->timestamp = now;
+	    g_free(sp->try_again_message);
 	    amfree(sp->disk->dataport_list);
 
 	    dumper->busy = 1;		/* dumper is now busy */
@@ -3531,7 +3533,9 @@ handle_dumper_result(
 	     * Requeue this disk, and fall through to the FAILED
 	     * case for cleanup.
 	     */
-	    if (sp->dump_attempted >= dp->retry_dump-1) {
+	    g_free(sp->try_again_message);
+	    sp->try_again_message = g_strdup(result_argv[2]);
+	    if (sp->dump_attempted >= dp->retry_dump) {
 		char *qname = quote_string(dp->name);
 		char *qerr = quote_string(result_argv[2]);
 		log_add(L_FAIL, _("%s %s %s %d [too many dumper retry: %s]"),
@@ -3648,7 +3652,10 @@ handle_dumper_result(
 	    /* send the dumper result to the chunker */
 	    if (job->chunker) {
 		if (cmd == TRYAGAIN) {
-		    chunker_cmd(job->chunker, ABORT, sp, "dumper TRYAGAIN");
+		    char *abort_message = g_strdup_printf("dumper TRYAGAIN: %s",
+						sp->try_again_message);
+		    chunker_cmd(job->chunker, ABORT, sp, abort_message);
+		    g_free(abort_message);
 		    pending_aborts++;
 		} else if (job->chunker->sendresult) {
 		    if (cmd == DONE) {
@@ -3666,7 +3673,10 @@ handle_dumper_result(
 		    wtaper = job->wtaper;
 		    taper = wtaper->taper;
 		    if (cmd == TRYAGAIN) {
-			taper_cmd(taper, wtaper, ABORT, sp, NULL, 0, "dumper TRYAGAIN");
+			char *abort_message = g_strdup_printf("dumper TRYAGAIN: %s",
+							sp->try_again_message);
+			taper_cmd(taper, wtaper, ABORT, sp, NULL, 0, abort_message);
+			g_free(abort_message);
 		    } else if (cmd == DONE && wtaper->sendresult) {
 			taper_cmd(taper, wtaper, DONE, sp, NULL, 0, NULL);
 			wtaper->sendresult = FALSE;
diff --git a/server-src/driverio.h b/server-src/driverio.h
index d9f75ca..6ea410f 100644
--- a/server-src/driverio.h
+++ b/server-src/driverio.h
@@ -129,6 +129,7 @@ typedef struct sched_s {
     char *src_pool;
     char *src_label;
     int   src_fileno;
+    char *try_again_message;
 } sched_t;
 
 typedef struct schedlist_s {
diff --git a/server-src/dumper.c b/server-src/dumper.c
index b21e7e8..e293280 100644
--- a/server-src/dumper.c
+++ b/server-src/dumper.c
@@ -46,6 +46,10 @@
 #include "timestamp.h"
 #include "amxml.h"
 
+#ifdef FAILURE_CODE
+static int dumper_try_again=0;
+#endif
+
 #define dumper_debug(i, ...) do {	\
 	if ((i) <= debug_dumper) {	\
 	    g_debug(__VA_ARGS__);	\
@@ -814,7 +818,16 @@ main(
 			      ssh_keys,
 			      auth,
 			      options);
-
+#ifdef FAILURE_CODE
+	    if (dumper_try_again==0) {
+		char *A = getenv("DUMPER_TRY_AGAIN");
+		if (A) {
+		    rc=1;
+		    errstr=g_strdup("DUMPER-TRY-AGAIN");
+		    dumper_try_again=1;
+		}
+	    }
+#endif
 	    if (rc == 3) {
 		log_add(L_RETRY, "%s %s %s %d delay %d level %d message %s",
 			hostname, qdiskname, dumper_timestamp, level,
@@ -825,8 +838,7 @@ main(
 		q = quote_string(errstr);
 		putresult(rc == 2? FAILED : TRYAGAIN, "%s %s\n",
 		    handle, q);
-		if (rc == 2)
-		    log_add(L_FAIL, "%s %s %s %d [%s]", hostname, qdiskname,
+		log_add(L_FAIL, "%s %s %s %d [%s]", hostname, qdiskname,
 			dumper_timestamp, level, errstr);
 		amfree(q);
 	    } else {

Reply via email to