Throws a warning:

networking/wget.c: In function 'fread_buffered':
networking/wget.c:575: error: declaration of 'read' shadows a global declaration

Way too large, and adds to bss:

function                                             old     new   delta
fgets_buffer                                           -    4096   +4096
fgets_trim_sanitize                                  128     621    +493
retrieve_file_data                                   579     775    +196
open_socket                                           49     117     +68
fgets_buffer_len                                       -       4      +4
wget_main                                           2437    2435      -2
set_alarm                                             27       -     -27
------------------------------------------------------------------------------
(add/remove: 2/1 grow/shrink: 3/1 up/down: 4857/-29)         Total: 4828 bytes
   text       data        bss        dec        hex    filename
 979050        485       7296     986831      f0ecf    busybox_old
 979775        485      11400     991660      f21ac    busybox_unstripped

The second patch is mangled by gmail, please resend as attachment.

I played with a version where one try is done in a child,
see attached z.diff.

function                                             old     new   delta
download_one_url                                       -    2221   +2221
retrieve_file_data                                   579     602     +23
ftpcmd                                               133     151     +18
get_sanitized_hdr                                    156     162      +6
fgets_trim_sanitize                                  128     131      +3
base64enc                                             46      49      +3
packed_usage                                       33070   33042     -28
wget_main                                           2437     565   -1872
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 5/2 up/down: 2274/-1900)        Total: 374 bytes

I'm not entirely happy with this approach either...
...probably need to rewrite existing code to get rid of fgets(),
use poll() + read().
diff --git a/networking/wget.c b/networking/wget.c
index 3f3d3a0c9..25443f8a5 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -122,16 +122,15 @@
 
 //usage:#define wget_trivial_usage
 //usage:	IF_FEATURE_WGET_LONG_OPTIONS(
-//usage:       "[-c|--continue] [--spider] [-q|--quiet] [-O|--output-document FILE]\n"
-//usage:       "	[-o|--output-file FILE] [--header 'header: value'] [-Y|--proxy on/off]\n"
+//usage:       "[-cqS] [--spider] [-O FILE] [-o FILE] [--header 'header: value']\n"
 /* Since we ignore these opts, we don't show them in --help */
-/* //usage:    "	[--no-check-certificate] [--no-cache] [--passive-ftp] [-t TRIES]" */
+/* //usage:    "	[--no-check-certificate] [--no-cache] [--passive-ftp]" */
 /* //usage:    "	[-nv] [-nc] [-nH] [-np]" */
-//usage:       "	[-P DIR] [-S|--server-response] [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       "	[-Y on/off] [-P DIR] [-U AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC] [-t TRIES]") " URL..."
 //usage:	)
 //usage:	IF_NOT_FEATURE_WGET_LONG_OPTIONS(
-//usage:       "[-cq] [-O FILE] [-o FILE] [-Y on/off] [-P DIR] [-S] [-U AGENT]"
-//usage:			IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       "[-cqS] [-O FILE] [-o FILE] [-Y on/off] [-P DIR] [-U AGENT]"
+//usage:			IF_FEATURE_WGET_TIMEOUT(" [-T SEC] [-t TRIES]") " URL..."
 //usage:	)
 //usage:#define wget_full_usage "\n\n"
 //usage:       "Retrieve files via HTTP or FTP\n"
@@ -145,6 +144,7 @@
 //usage:     "\n	-S    		Show server response"
 //usage:	IF_FEATURE_WGET_TIMEOUT(
 //usage:     "\n	-T SEC		Network read timeout is SEC seconds"
+//usage:     "\n	-t RETRIES	Retry on errors (0:infinite)"
 //usage:	)
 //usage:     "\n	-O FILE		Save to FILE ('-' for stdout)"
 //usage:     "\n	-o FILE		Log messages to FILE"
@@ -239,6 +239,7 @@ struct globals {
 	int log_fd;
 	int o_flags;
 #if ENABLE_FEATURE_WGET_TIMEOUT
+	unsigned retries;
 	unsigned timeout_seconds;
 	smallint die_if_timed_out;
 #endif
@@ -248,7 +249,7 @@ struct globals {
 	 * With 512 byte buffer, it was measured to be
 	 * an order of magnitude slower than with big one.
 	 */
-	char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(sizeof(long));
+	char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024] ALIGNED(64);
 } FIX_ALIASING;
 #define G (*ptr_to_globals)
 #define INIT_G() do { \
@@ -388,9 +389,6 @@ static void set_alarm(void)
  * is_ip_address() attempts to verify whether or not a string
  * contains an IPv4 or IPv6 address (vs. an FQDN).  The result
  * of inet_pton() can be used to determine this.
- *
- * TODO add proper error checking when inet_pton() returns -1
- * (some form of system error has occurred, and errno is set)
  */
 static int is_ip_address(const char *string)
 {
@@ -1012,6 +1010,15 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 		 */
 	}
 
+	/* Draw full bar and free its resources */
+	G.chunked = 0;  /* makes it show 100% even for chunked download */
+	G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
+	progress_meter(PROGRESS_END);
+	if (G.content_len != 0) {
+		bb_perror_msg_and_die("connection closed prematurely");
+		/* GNU wget says "DATE TIME (NN MB/s) - Connection closed at byte NNN. Retrying." */
+	}
+
 	/* If -c failed, we restart from the beginning,
 	 * but we do not truncate file then, we do it only now, at the end.
 	 * This lets user to ^C if his 99% complete 10 GB file download
@@ -1023,10 +1030,6 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 			ftruncate(G.output_fd, pos);
 	}
 
-	/* Draw full bar and free its resources */
-	G.chunked = 0;  /* makes it show 100% even for chunked download */
-	G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
-	progress_meter(PROGRESS_END);
 	if (!(option_mask32 & WGET_OPT_QUIET)) {
 		if (G.output_fd == 1)
 			fprintf(stderr, "written to stdout\n");
@@ -1385,8 +1388,11 @@ However, in real world it was observed that some web servers
 	free(lsa);
 
 	if (!(option_mask32 & WGET_OPT_SPIDER)) {
-		if (G.output_fd < 0)
+		if (G.output_fd < 0) {
+			IF_FEATURE_WGET_TIMEOUT(xfunc_error_retval = 3;) /* signal to parent that retry is useless */
 			G.output_fd = xopen(G.fname_out, G.o_flags);
+			IF_FEATURE_WGET_TIMEOUT(xfunc_error_retval = 1;) /* restore */
+		}
 		retrieve_file_data(dfp);
 		if (!(option_mask32 & WGET_OPT_OUTNAME)) {
 			xclose(G.output_fd);
@@ -1429,9 +1435,9 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
 		"proxy\0"            Required_argument "Y"
 		"user-agent\0"       Required_argument "U"
 IF_FEATURE_WGET_TIMEOUT(
-		"timeout\0"          Required_argument "T")
-		/* Ignored: */
-IF_DESKTOP(	"tries\0"            Required_argument "t")
+		"timeout\0"          Required_argument "T"
+		"tries\0"            Required_argument "t"
+)
 		"header\0"           Required_argument "\xff"
 		"post-data\0"        Required_argument "\xfe"
 		"spider\0"           No_argument       "\xfd"
@@ -1452,6 +1458,7 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 # define LONGOPTS
 #endif
 
+	smallint exitcode;
 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
 	llist_t *headers_llist = NULL;
 #endif
@@ -1459,6 +1466,7 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 	INIT_G();
 
 #if ENABLE_FEATURE_WGET_TIMEOUT
+	G.retries = 20; /* GNU wget defaults */
 	G.timeout_seconds = 900;
 	signal(SIGALRM, alarm_handler);
 #endif
@@ -1466,8 +1474,7 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 	G.user_agent = "Wget"; /* "User-Agent" header field */
 
 	GETOPT32(argv, "^"
-		"cqSO:o:P:Y:U:T:+"
-		/*ignored:*/ "t:"
+		"cqSO:o:P:Y:U:T:+t:+"
 		/*ignored:*/ "n::"
 		/* wget has exactly four -n<letter> opts, all of which we can ignore:
 		 * -nv --no-verbose: be moderately quiet (-q is full quiet)
@@ -1483,8 +1490,8 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 		LONGOPTS
 		, &G.fname_out, &G.fname_log, &G.dir_prefix,
 		&G.proxy_flag, &G.user_agent,
-		IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
-		NULL, /* -t RETRIES */
+		IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), /* -T TMOUT */
+		IF_FEATURE_WGET_TIMEOUT(&G.retries) IF_NOT_FEATURE_WGET_TIMEOUT(NULL), /* -t RETRIES */
 		NULL  /* -n[ARG] */
 		IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
 		IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
@@ -1553,8 +1560,46 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 		}
 	}
 
-	while (*argv)
-		download_one_url(*argv++);
+	exitcode = EXIT_SUCCESS;
+	while (*argv) {
+		const char *url = *argv++;
+#if !BB_MMU || !ENABLE_FEATURE_WGET_TIMEOUT
+		download_one_url(url); /* dies on errors */
+#else
+		uint32_t sv32 = option_mask32;
+		int cnt = G.retries; /* NB: 0 will result in INT_MAX retries */
+		if (cnt == 1) {
+			download_one_url(url);
+			continue;
+		}
+		while (1) {
+			int status;
+			pid_t pid = xfork();
+			if (pid == 0) {
+				/* child */
+				download_one_url(url);
+				/* we reach this location only on success */
+				fflush_all();
+				_exit(0);
+			}
+			status = wait_for_exitstatus(pid);
+			if (!WIFEXITED(status)) /* SEGV and such? */
+				break;
+			if (WEXITSTATUS(status) == 0)
+				goto good;
+			if (WEXITSTATUS(status) == 3) /* fatal error, do not retry */
+				break;
+			if (--cnt == 0)
+				break;
+			sleep(10); /* --waitretry=10 is default in GNU wget */
+//fixme
+			option_mask32 = (option_mask32 | WGET_OPT_CONTINUE) & ~(uint32_t)WGET_OPT_NO_CHECK_CERT;
+		}
+		exitcode = EXIT_FAILURE;
+ good:
+		option_mask32 = sv32;
+#endif
+	}
 
 	if (G.output_fd >= 0)
 		xclose(G.output_fd);
@@ -1567,5 +1612,5 @@ IF_DESKTOP(	"no-parent\0"        No_argument       "\xf0")
 #endif
 	FINI_G();
 
-	return EXIT_SUCCESS;
+	return exitcode;
 }
_______________________________________________
busybox mailing list
busybox@busybox.net
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to