From 58a6162ca0b86dbf2497c37b46ac155fc0ac7567 Mon Sep 17 00:00:00 2001
From: Bykov Aleksey <gnfalex@rambler.ru>
Date: Fri, 17 May 2013 23:09:00 +0300
Subject: [PATCH 1/2] Add "--local-filesystem-encoding" options

---
 doc/ChangeLog |  4 ++++
 doc/wget.texi | 10 ++++++++++
 src/ChangeLog | 14 ++++++++++++++
 src/convert.c |  4 ++++
 src/init.c    |  3 ++-
 src/iri.c     | 21 +++++++++++++++++++++
 src/iri.h     |  5 +++++
 src/main.c    |  3 +++
 src/options.h |  1 +
 src/retr.c    |  3 ++-
 src/url.c     | 20 ++++++++++++++++++++
 11 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/doc/ChangeLog b/doc/ChangeLog
index 1b0173b..56cc54a 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2013-05-13 Bykov Aleksey (gnfalex@rambler.ru)
+
+	* wget.texi (Download Options): Tries to add description of --local-filesystem-encoding
+
 2013-05-10  Darshit Shah <darnir@gmail.com>  (tiny change)
 
 	* wget.texi (No of tries): Fix typo to make it clear that --tries
diff --git a/doc/wget.texi b/doc/wget.texi
index c2230a9..4f9a1e1 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -1071,6 +1071,16 @@ You can set the default local encoding using the @code{local_encoding}
 command in @file{.wgetrc}. That setting may be overridden from the
 command line.
 
+@cindex local filesystem encoding
+@item --local-filesystem-encoding=@var{encoding}
+
+Force Wget to use @var{encoding} as the default filesystem encoding. That
+affects how Wget converts recieved filenames from @sc{utf-8} to locale
+supported by OS.
+
+If not specifed, then value from @code{local_encoding} (if exist) is used
+or @sc{utf-8}.
+
 @cindex remote encoding
 @item --remote-encoding=@var{encoding}
 
diff --git a/src/ChangeLog b/src/ChangeLog
index fd037a1..b54f878 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,17 @@
+2013-05-14 Bykov Aleksey <gnfalex@rambler.ru>
+
+	* convert.c (convert_links_in_hashtable): Tries to take original
+		URL from hashtable (for case of local charset in filenames).
+	* retr.c (retrieve_url) : Now save original URL of all downloaded files
+		to hashtable (likewise).
+	* init.c : Add new options - "--local-filesystem-encoding".
+	* main.c : Likewise
+	* options.h : Likewise
+	* iri.c : Add new functions : utf8_to_filesystem, set_filesystem_encoding
+	* iri.h : Likewise
+	* url.c (append_uri_pathel): Convert filenames to specifed encoding.
+		(append_dir_structure) : Convert directory structure to specifed encoding.
+
 2013-05-14  Tim Ruehsen  <tim.ruehsen@gmx.de>
 
 	* cookies.c (cookie_jar_load): Replaced read_whole_file() by getline().
diff --git a/src/convert.c b/src/convert.c
index f5a9cba..77310bc 100644
--- a/src/convert.c
+++ b/src/convert.c
@@ -128,6 +128,10 @@ convert_links_in_hashtable (struct hash_table *downloaded_set,
 	    continue;
 
           local_name = hash_table_get (dl_url_file_map, u->url);
+          /* Tries to search also pre-encoded filename in hashtable. It's need for
+          filenames with local language characters */
+          if (!local_name)
+            local_name = hash_table_get (dl_url_file_map, cur_url->url->url);
 
           /* Decide on the conversion type.  */
           if (local_name)
diff --git a/src/init.c b/src/init.c
index 54a2919..46f7377 100644
--- a/src/init.c
+++ b/src/init.c
@@ -210,6 +210,7 @@ static const struct {
   { "limitrate",        &opt.limit_rate,        cmd_bytes },
   { "loadcookies",      &opt.cookies_input,     cmd_file },
   { "localencoding",    &opt.locale,            cmd_string },
+  { "localfilesystemencoding",  &opt.encoding_filesystem,cmd_string },
   { "logfile",          &opt.lfilename,         cmd_file },
   { "login",            &opt.ftp_user,          cmd_string },/* deprecated*/
   { "maxredirect",      &opt.max_redirect,      cmd_number },
@@ -389,7 +390,7 @@ defaults (void)
 #endif
   opt.locale = NULL;
   opt.encoding_remote = NULL;
-
+  opt.encoding_filesystem = NULL;
   opt.useservertimestamps = true;
   opt.show_all_dns_entries = false;
 
diff --git a/src/iri.c b/src/iri.c
index 9b16639..957e619 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -298,6 +298,20 @@ remote_to_utf8 (struct iri *i, const char *str, const char **new)
   return ret;
 }
 
+/* Tried to encode filename to given encoding. */
+
+bool
+utf8_to_filesystem (struct iri *i, const char *str, const char **new)
+{
+  iconv_t cd;
+  bool ret = false;
+  cd = iconv_open (i->filesystem_encoding,"UTF-8");
+  if (do_conversion (cd, (char *) str, strlen ((char *) str), (char **) new))
+    ret=true;
+  iconv_close(cd);
+  return ret;
+}
+
 /* Allocate a new iri structure and return a pointer to it. */
 struct iri *
 iri_new (void)
@@ -305,6 +319,7 @@ iri_new (void)
   struct iri *i = xmalloc (sizeof *i);
   i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
   i->content_encoding = NULL;
+  i->filesystem_encoding = NULL;
   i->orig_url = NULL;
   i->utf8_encode = opt.enable_iri;
   return i;
@@ -366,3 +381,9 @@ set_content_encoding (struct iri *i, char *charset)
   i->content_encoding = charset ? xstrdup (charset) : NULL;
 }
 
+void
+set_filesystem_encoding (struct iri *i)
+{
+  i->filesystem_encoding = opt.encoding_filesystem ? xstrdup (opt.encoding_filesystem) : xstrdup (opt.locale);
+}
+
diff --git a/src/iri.h b/src/iri.h
index e759e45..78dbd35 100644
--- a/src/iri.h
+++ b/src/iri.h
@@ -35,6 +35,7 @@ struct iri {
   char *content_encoding;  /* Encoding of links inside the fetched file */
   char *orig_url;          /* */
   bool utf8_encode;        /* Will/Is the current url encoded in utf8 */
+  char *filesystem_encoding;
 };
 
 #ifdef ENABLE_IRI
@@ -46,11 +47,13 @@ const char *locale_to_utf8 (const char *str);
 char *idn_encode (struct iri *i, char *host);
 char *idn_decode (char *host);
 bool remote_to_utf8 (struct iri *i, const char *str, const char **new);
+bool utf8_to_filesystem (struct iri *i, const char *str, const char **new);
 struct iri *iri_new (void);
 struct iri *iri_dup (const struct iri *);
 void iri_free (struct iri *i);
 void set_uri_encoding (struct iri *i, char *charset, bool force);
 void set_content_encoding (struct iri *i, char *charset);
+void set_filesystem_encoding (struct iri *i);
 
 #else /* ENABLE_IRI */
 
@@ -63,11 +66,13 @@ extern struct iri dummy_iri;
 #define idn_encode(a,b)             NULL
 #define idn_decode(str)             NULL
 #define remote_to_utf8(a,b,c)       false
+#define utf8_to_filesystem(a,b,c)   false
 #define iri_new()                   (&dummy_iri)
 #define iri_dup(a)                  (&dummy_iri)
 #define iri_free(a)
 #define set_uri_encoding(a,b,c)
 #define set_content_encoding(a,b)
+#define set_filesystem_encoding(a,b)
 
 #endif /* ENABLE_IRI */
 #endif /* IRI_H */
diff --git a/src/main.c b/src/main.c
index 2b42d2d..72d8a19 100644
--- a/src/main.c
+++ b/src/main.c
@@ -232,6 +232,7 @@ static struct cmdline_option option_data[] =
     { "limit-rate", 0, OPT_VALUE, "limitrate", -1 },
     { "load-cookies", 0, OPT_VALUE, "loadcookies", -1 },
     { "local-encoding", 0, OPT_VALUE, "localencoding", -1 },
+    { "local-filesystem-encoding", 0, OPT_VALUE, "localfilesystemencoding", -1 },
     { "max-redirect", 0, OPT_VALUE, "maxredirect", -1 },
     { "method", 0, OPT_VALUE, "method", -1 },
     { "mirror", 'm', OPT_BOOLEAN, "mirror", -1 },
@@ -550,6 +551,8 @@ Download:\n"),
     N_("\
        --remote-encoding=ENC     use ENC as the default remote encoding.\n"),
     N_("\
+       --local-filesystem-encoding=ENC use ENC as the default remote encoding. If not specifed, use value of --local-encoding.\n"),
+    N_("\
        --unlink                  remove file before clobber.\n"),
     "\n",
 
diff --git a/src/options.h b/src/options.h
index ed38617..24313fb 100644
--- a/src/options.h
+++ b/src/options.h
@@ -270,6 +270,7 @@ struct options
 
   bool enable_iri;
   char *encoding_remote;
+  char *encoding_filesystem;
   char *locale;
 
   bool trustservernames;
diff --git a/src/retr.c b/src/retr.c
index 9002b0e..fd0e71c 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -933,7 +933,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
     {
       register_download (u->url, local_file);
 
-      if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))
+     /* if (!opt.spider && redirection_count && 0 != strcmp (origurl, u->url))*/
+      if (!opt.spider && 0 != strcmp (origurl, u->url))
         register_redirection (origurl, u->url);
 
       if (*dt & TEXTHTML)
diff --git a/src/url.c b/src/url.c
index 87d6290..4ffe248 100644
--- a/src/url.c
+++ b/src/url.c
@@ -1406,9 +1406,20 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
                    struct growable *dest)
 {
   const char *p;
+
+  char *temp_p;
+  struct iri *iri = NULL;
+
   int quoted, outlen;
 
   int mask;
+
+  if (opt.enable_iri && (opt.encoding_filesystem || opt.locale))
+    {
+      iri= iri_new ();
+      set_filesystem_encoding(iri);
+    }
+
   if (opt.restrict_files_os == restrict_unix)
     mask = filechr_not_unix;
   else
@@ -1426,6 +1437,15 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
       e = unescaped + strlen (unescaped);
     }
 
+  if (iri)
+    if (utf8_to_filesystem (iri,b,(const char **)&temp_p))
+      {
+        b=temp_p;
+        e = b + strlen (b);
+      }
+  if (iri)
+    iri_free (iri);
+
   /* Defang ".." when found as component of path.  Remember that path
      comes from the URL and might contain malicious input.  */
   if (e - b == 2 && b[0] == '.' && b[1] == '.')
-- 
1.8.1.msysgit.1

