Mauro Tortonesi <[EMAIL PROTECTED]> writes: > there is another possible solution. reordering the addresses > returned by getaddrinfo so that IPv4 addresses are at the beginning > of the list. [...] > i think that the best think we can do right now is reordering the > addresses returned by getaddrinfo.
I have now implemented this. I also made sure that the order of the addresses with the same family is left undisturbed, and that the sorting can be turned off, or even made to favor IPv6 like nc6 does. The reordering is stable, which means the order of addresses with the same family is respected. This does introduce a new obscure command-line option, but I think preventing the "why is Wget connecting to IPv6 hosts" mails is worth it. Thanks for the suggestion. > http://cvs.deepspace6.net/view/nc6/src/netsupport.c > > of course nc6 is an IPv6 oriented tool, so it tries to use IPv6 > addresses first, but for wget the opposite behaviour makes sense to > me. if we choose to do that, we might want to have a > --dont-reorder-v4-first switch to change the default behaviour This patch allows: --prefer-family=ipv4 connect to IPv4 first, default --prefer-family=ipv6 connect to IPv6 first --prefer-family=ipv6 respect order returned by getaddrinfo --- doc/ChangeLog: 2005-04-24 Hrvoje Niksic <[EMAIL PROTECTED]> * wget.texi (Download Options): Document --prefer-family. --- src/ChangeLog 2005-04-24 Hrvoje Niksic <[EMAIL PROTECTED]> * host.c (cmp_prefer_ipv4): New function. (cmp_prefer_ipv6): New function. (lookup_host): Use the appropriate comparator according to opt.prefer_family. * init.c: New option prefer_family. * host.c (is_valid_ipv6_address): Spell NS_* constants in lower case to avoid clash with system headers. (lookup_host): Reorder the addresses so that IPv4 ones come first. * utils.c (stable_sort): New function. Index: doc/wget.texi =================================================================== RCS file: /pack/anoncvs/wget/doc/wget.texi,v retrieving revision 1.112 diff -u -r1.112 wget.texi --- doc/wget.texi 2005/04/24 10:54:58 1.112 +++ doc/wget.texi 2005/04/24 19:36:32 @@ -900,6 +900,27 @@ @samp{--inet6-only} and @samp{--inet4-only} may be specified in the same command. Neither option is available in Wget compiled without IPv6 support. + [EMAIL PROTECTED] --prefer-family=IPv4/IPv6/none +When given a choice of several addresses, connect to the addresses +with specified address family first. IPv4 addresses are preferred by +default. + +This avoids spurious errors and correct attempts when accessing hosts +that resolve to both IPv6 and IPv4 addresses from IPv4 networks. For +example, @samp{www.kame.net} resolves to [EMAIL PROTECTED]:200:0:8002:203:47ff:fea5:3085} and to [EMAIL PROTECTED] When the preferred family is @code{IPv4}, the +IPv4 address is used first; when the preferred family is @code{IPv6}, +the IPv6 address is used first; if the specified value is @code{none}, +the address order returned by DNS is used without change. + +Unlike @samp{-4} and @samp{-6}, this option doesn't forbid access to +any address family, it only changes the @emph{order} in which the +addresses are accessed. Also note that the reordering performed by +this option is @dfn{stable}---it doesn't affect order of addresses of +the same family. That is, the relative order of all IPv4 addresses +and of all IPv6 addresses remains intact in all cases. @end table @node Directory Options @@ -2536,6 +2557,12 @@ Use POST as the method for all HTTP requests and send the contents of @var{file} in the request body. The same as @samp{--post-file}. [EMAIL PROTECTED] prefer_family = IPv4/IPv6/none +When given a choice of several addresses, connect to the addresses +with specified address family first. IPv4 addresses are preferred by +default. The same as @samp{--prefer-family}, which see for a detailed +discussion of why this is useful. + @item progress = @var{string} Set the type of the progress indicator. Legal types are ``dot'' and ``bar''. @@ -3548,6 +3575,12 @@ Dave Turner, Gisle Vanem, Russell Vincent, [EMAIL PROTECTED] [EMAIL PROTECTED] Vrba, [EMAIL PROTECTED] iftex [EMAIL PROTECTED] +Zeljko Vrba, [EMAIL PROTECTED] ifnottex Charles G Waldman, Douglas E. Wegscheid, YAMAZAKI Makoto, Index: src/host.c =================================================================== RCS file: /pack/anoncvs/wget/src/host.c,v retrieving revision 1.82 diff -u -r1.82 host.c --- src/host.c 2005/04/07 20:43:26 1.82 +++ src/host.c 2005/04/24 19:36:47 @@ -239,6 +239,32 @@ return al; } +#define IS_IPV4(addr) (((const ip_address *) addr)->type == IPV4_ADDRESS) + +/* Compare two IP addresses by type, giving preference to the IPv4 + address (sorting it first). In other words, return -1 if ADDR1 is + IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and + 0 otherwise. + + This is intended to be used as the comparator arg to a qsort-like + sorting function, which is why it accepts generic pointers. */ + +static int +cmp_prefer_ipv4 (const void *addr1, const void *addr2) +{ + return !IS_IPV4 (addr1) - !IS_IPV4 (addr2); +} + +#define IS_IPV6(addr) (((const ip_address *) addr)->type == IPV6_ADDRESS) + +/* Like the above, but give preference to the IPv6 address. */ + +static int +cmp_prefer_ipv6 (const void *addr1, const void *addr2) +{ + return !IS_IPV6 (addr1) - !IS_IPV6 (addr2); +} + #else /* not ENABLE_IPV6 */ /* Create an address_list from a NULL-terminated vector of IPv4 @@ -473,10 +499,11 @@ int is_valid_ipv6_address (const char *str, const char *end) { + /* Use lower-case for these to avoid clash with system headers. */ enum { - NS_INADDRSZ = 4, - NS_IN6ADDRSZ = 16, - NS_INT16SZ = 2 + ns_inaddrsz = 4, + ns_in6addrsz = 16, + ns_int16sz = 2 }; const char *curtok; @@ -531,19 +558,19 @@ } else if (str == end) return 0; - if (tp > NS_IN6ADDRSZ - NS_INT16SZ) + if (tp > ns_in6addrsz - ns_int16sz) return 0; - tp += NS_INT16SZ; + tp += ns_int16sz; saw_xdigit = 0; val = 0; continue; } /* if ch is a dot ... */ - if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ) + if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz) && is_valid_ipv4_address (curtok, end) == 1) { - tp += NS_INADDRSZ; + tp += ns_inaddrsz; saw_xdigit = 0; break; } @@ -553,19 +580,19 @@ if (saw_xdigit == 1) { - if (tp > NS_IN6ADDRSZ - NS_INT16SZ) + if (tp > ns_in6addrsz - ns_int16sz) return 0; - tp += NS_INT16SZ; + tp += ns_int16sz; } if (colonp != NULL) { - if (tp == NS_IN6ADDRSZ) + if (tp == ns_in6addrsz) return 0; - tp = NS_IN6ADDRSZ; + tp = ns_in6addrsz; } - if (tp != NS_IN6ADDRSZ) + if (tp != ns_in6addrsz) return 0; return 1; @@ -640,15 +667,20 @@ } } -/* Look up HOST in DNS and return a list of IP addresses. The - addresses in the list are in the same order in which - gethostbyname/getaddrinfo returned them. +/* Look up HOST in DNS and return a list of IP addresses. This function caches its result so that, if the same host is passed the second time, the addresses are returned without DNS lookup. (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to globally disable caching.) + The order of the returned addresses is affected by the setting of + opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are + placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed + at the beginning; otherwise, the order is left intact. The + relative order of addresses with the same family is left + undisturbed in either case. + FLAGS can be a combination of: LH_SILENT - don't print the "resolving ... done" messages. LH_BIND - resolve addresses for use with bind, which under @@ -778,6 +810,14 @@ _("failed: No IPv4/IPv6 addresses for host.\n")); return NULL; } + + /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per + --prefer-family) come first. Sorting is stable so the order of + the addresses with the same family is undisturbed. */ + if (al->count > 1 && opt.prefer_family != prefer_none) + stable_sort (al->addresses, al->count, sizeof (ip_address), + opt.prefer_family == prefer_ipv4 + ? cmp_prefer_ipv4 : cmp_prefer_ipv6); } #else /* not ENABLE_IPV6 */ { Index: src/init.c =================================================================== RCS file: /pack/anoncvs/wget/src/init.c,v retrieving revision 1.100 diff -u -r1.100 init.c --- src/init.c 2005/04/07 21:37:21 1.100 +++ src/init.c 2005/04/24 19:36:49 @@ -88,6 +88,7 @@ CMD_DECLARE (cmd_spec_header); CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_mirror); +CMD_DECLARE (cmd_spec_prefer_family); CMD_DECLARE (cmd_spec_progress); CMD_DECLARE (cmd_spec_recursive); CMD_DECLARE (cmd_spec_restrict_file_names); @@ -177,6 +178,7 @@ { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, { "postdata", &opt.post_data, cmd_string }, { "postfile", &opt.post_file_name, cmd_file }, + { "preferfamily", NULL, cmd_spec_prefer_family }, { "preservepermissions", &opt.preserve_perm, cmd_boolean }, { "progress", &opt.progress_type, cmd_spec_progress }, { "protocoldirectories", &opt.protocol_directories, cmd_boolean }, @@ -1070,6 +1072,32 @@ opt.remove_listing = 0; } return 1; +} + +/* Validate --prefer-family and set the choice. Allowed values are + "IPv4", "IPv6", and "none". */ + +static int +cmd_spec_prefer_family (const char *com, const char *val, void *closure) +{ + if (0 == strcasecmp (val, "ipv4")) + { + opt.prefer_family = prefer_ipv4; + return 1; + } + else if (0 == strcasecmp (val, "ipv6")) + { + opt.prefer_family = prefer_ipv6; + return 1; + } + else if (0 == strcasecmp (val, "none")) + { + opt.prefer_family = prefer_none; + return 1; + } + fprintf (stderr, _("%s: %s: Invalid preferred family `%s'.\n"), + exec_name, com, val); + return 0; } /* Set progress.type to VAL, but verify that it's a valid progress Index: src/main.c =================================================================== RCS file: /pack/anoncvs/wget/src/main.c,v retrieving revision 1.120 diff -u -r1.120 main.c --- src/main.c 2005/04/21 14:50:45 1.120 +++ src/main.c 2005/04/24 19:36:50 @@ -214,6 +214,7 @@ { "passive-ftp", 0, OPT_BOOLEAN, "passiveftp", -1 }, { "post-data", 0, OPT_VALUE, "postdata", -1 }, { "post-file", 0, OPT_VALUE, "postfile", -1 }, + { "prefer-family", 0, OPT_VALUE, "preferfamily", -1 }, { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 }, { "progress", 0, OPT_VALUE, "progress", -1 }, { "protocol-directories", 0, OPT_BOOLEAN, "protocoldirectories", -1 }, @@ -465,6 +466,9 @@ -4, --inet4-only connect only to IPv4 addresses.\n"), N_("\ -6, --inet6-only connect only to IPv6 addresses.\n"), + N_("\ + --prefer-family=FAMILY connect first to addresses of specified family,\n\ + one of IPv6, IPv4, or none.\n"), #endif "\n", Index: src/options.h =================================================================== RCS file: /pack/anoncvs/wget/src/options.h,v retrieving revision 1.47 diff -u -r1.47 options.h --- src/options.h 2005/03/19 17:29:25 1.47 +++ src/options.h 2005/04/24 19:36:51 @@ -197,6 +197,12 @@ int ipv4_only; /* IPv4 connections have been requested. */ int ipv6_only; /* IPv4 connections have been requested. */ #endif + enum { + prefer_ipv4, + prefer_ipv6, + prefer_none + } prefer_family; /* preferred address family when more + than one type is available */ }; extern struct options opt; Index: src/utils.c =================================================================== RCS file: /pack/anoncvs/wget/src/utils.c,v retrieving revision 1.93 diff -u -r1.93 utils.c --- src/utils.c 2005/04/23 00:20:13 1.93 +++ src/utils.c 2005/04/24 19:36:54 @@ -1975,3 +1975,49 @@ #undef IS_ASCII #undef IS_BASE64 #undef NEXT_BASE64_CHAR + +/* Simple merge sort for use by stable_sort. Implementation courtesy + Zeljko Vrba. */ + +static void +mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to, + int (*cmpfun) PARAMS ((const void *, const void *))) +{ +#define ELT(array, pos) ((char *)(array) + (pos) * size) + if (from < to) + { + size_t i, j, k; + size_t mid = (to + from) / 2; + mergesort_internal (base, temp, size, from, mid, cmpfun); + mergesort_internal (base, temp, size, mid + 1, to, cmpfun); + i = from; + j = mid + 1; + for (k = from; (i <= mid) && (j <= to); k++) + if (cmpfun (ELT (base, i), ELT (base, j)) <= 0) + memcpy (ELT (temp, k), ELT (base, i++), size); + else + memcpy (ELT (temp, k), ELT (base, j++), size); + while (i <= mid) + memcpy (ELT (temp, k++), ELT (base, i++), size); + while (j <= to) + memcpy (ELT (temp, k++), ELT (base, j++), size); + for (k = from; k <= to; k++) + memcpy (ELT (base, k), ELT (temp, k), size); + } +#undef ELT +} + +/* Stable sort with interface exactly like standard library's qsort. + Uses mergesort internally, allocating temporary storage with + alloca. */ + +void +stable_sort (void *base, size_t nmemb, size_t size, + int (*cmpfun) PARAMS ((const void *, const void *))) +{ + if (size > 1) + { + void *temp = alloca (nmemb * size * sizeof (void *)); + mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); + } +} Index: src/utils.h =================================================================== RCS file: /pack/anoncvs/wget/src/utils.h,v retrieving revision 1.42 diff -u -r1.42 utils.h --- src/utils.h 2005/04/18 14:23:23 1.42 +++ src/utils.h 2005/04/24 19:36:54 @@ -119,4 +119,7 @@ int base64_encode PARAMS ((const char *, int, char *)); int base64_decode PARAMS ((const char *, char *)); +void stable_sort PARAMS ((void *, size_t, size_t, + int (*) (const void *, const void *))); + #endif /* UTILS_H */
