r5251 - Fix showing non-utf8 encoded torrents in add torre...

svn Sat, 09 May 2009 10:46:17 -0700

Author: andar

Revision: 5251


Log:
        Fix showing non-utf8 encoded torrents in add torrent dialog -- this 
adds an additional dependency on chardet.

Diff:
Modified: branches/1.1.0_RC/ChangeLog
===================================================================
--- branches/1.1.0_RC/ChangeLog 2009-05-09 03:42:39 UTC (rev 5250)
+++ branches/1.1.0_RC/ChangeLog 2009-05-09 17:46:13 UTC (rev 5251)
@@ -5,6 +5,8 @@
 
 ==== GtkUI ====
   * Fix high cpu usage when displaying speeds in titlebar
+  * Fix showing non-utf8 encoded torrents in add torrent dialog -- this adds
+    an additional dependency on chardet.
 
 ==== WebUI ====
   * Fix starting when -l option is used

Modified: branches/1.1.0_RC/README
===================================================================
--- branches/1.1.0_RC/README    2009-05-09 03:42:39 UTC (rev 5250)
+++ branches/1.1.0_RC/README    2009-05-09 17:46:13 UTC (rev 5251)
@@ -41,7 +41,8 @@
     python-gtk2 python-notify librsvg2-common python-xdg python-support \
     subversion libboost-dev libboost-python-dev libboost-iostreams-dev \
     libboost-thread-dev libboost-date-time-dev libboost-filesystem-dev \
-    libboost-serialization-dev libssl-dev zlib1g-dev python-setuptools
+    libboost-serialization-dev libssl-dev zlib1g-dev python-setuptools \
+    python-chardet
 
 The names of the packages may vary depending on your OS / distro.
 

Modified: branches/1.1.0_RC/deluge/ui/common.py
===================================================================
--- branches/1.1.0_RC/deluge/ui/common.py       2009-05-09 03:42:39 UTC (rev 
5250)
+++ branches/1.1.0_RC/deluge/ui/common.py       2009-05-09 17:46:13 UTC (rev 
5251)
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # deluge/ui/common.py
 #
 # Copyright (C) Damien Churchill 2008 <[email protected]>
+# Copyright (C) Andrew Resch 2009 <[email protected]>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -32,9 +32,6 @@
 #    statement from all source files in the program, then also delete it here.
 #
 
-#
-
-
 import os
 try:
     from hashlib import sha1 as sha
@@ -46,6 +43,28 @@
 from deluge.log import LOG as log
 import deluge.configmanager
 
+def decode_string(s, encoding="utf8"):
+    """
+    Decodes a string and re-encodes it in utf8.  If it cannot decode using
+    `:param:encoding` then it will try to detect the string encoding and
+    decode it.
+
+    :param s: str to decode
+    :param encoding: str, the encoding to use in the decoding
+
+    """
+
+    try:
+        s = s.decode(encoding).encode("utf8")
+    except UnicodeDecodeError:
+        try:
+            import chardet
+        except ImportError:
+            s = s.decode(encoding, "replace").encode("utf8")
+        else:
+            s = s.decode(chardet.detect(s)["encoding"]).encode("utf8")
+    return s
+
 class TorrentInfo(object):
     def __init__(self, filename):
         # Get the torrent data from the torrent file
@@ -65,29 +84,31 @@
         elif "codepage" in self.__m_metadata:
             self.encoding = str(self.__m_metadata["codepage"])
 
+        self.__m_name = decode_string(self.__m_metadata["info"]["name"])
+
         # Get list of files from torrent info
         self.__m_files = []
         if self.__m_metadata["info"].has_key("files"):
             prefix = ""
             if len(self.__m_metadata["info"]["files"]) > 1:
-                prefix = 
self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8")
+                prefix = self.__m_name
 
             for f in self.__m_metadata["info"]["files"]:
                 self.__m_files.append({
-                    'path': os.path.join(prefix, 
*f["path"]).decode(self.encoding, "replace").encode("utf8"),
+                    'path': decode_string(os.path.join(prefix, *f["path"])),
                     'size': f["length"],
                     'download': True
                 })
         else:
             self.__m_files.append({
-                "path": 
self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8"),
+                "path": self.__m_name,
                 "size": self.__m_metadata["info"]["length"],
                 "download": True
         })
 
     @property
     def name(self):
-        return self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8")
+        return self.__m_name
 
     @property
     def info_hash(self):

Modified: trunk/README
===================================================================
--- trunk/README        2009-05-09 03:42:39 UTC (rev 5250)
+++ trunk/README        2009-05-09 17:46:13 UTC (rev 5251)
@@ -54,6 +54,7 @@
     pygtk >= 2.10
     librsvg
     xdg-utils
+    chardet
 
   Web:
     mako

Modified: trunk/deluge/ui/common.py
===================================================================
--- trunk/deluge/ui/common.py   2009-05-09 03:42:39 UTC (rev 5250)
+++ trunk/deluge/ui/common.py   2009-05-09 17:46:13 UTC (rev 5251)
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
 # deluge/ui/common.py
 #
 # Copyright (C) Damien Churchill 2008 <[email protected]>
+# Copyright (C) Andrew Resch 2009 <[email protected]>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -24,18 +24,39 @@
 
 import os
 import sys
+import urlparse
 
 try:
     from hashlib import sha1 as sha
 except ImportError:
     from sha import sha
 
-import urlparse
-
 from deluge import bencode
 from deluge.log import LOG as log
 import deluge.configmanager
 
+def decode_string(s, encoding="utf8"):
+    """
+    Decodes a string and re-encodes it in utf8.  If it cannot decode using
+    `:param:encoding` then it will try to detect the string encoding and
+    decode it.
+
+    :param s: str to decode
+    :param encoding: str, the encoding to use in the decoding
+
+    """
+
+    try:
+        s = s.decode(encoding).encode("utf8")
+    except UnicodeDecodeError:
+        try:
+            import chardet
+        except ImportError:
+            s = s.decode(encoding, "replace").encode("utf8")
+        else:
+            s = s.decode(chardet.detect(s)["encoding"]).encode("utf8")
+    return s
+
 class TorrentInfo(object):
     def __init__(self, filename):
         # Get the torrent data from the torrent file
@@ -52,16 +73,22 @@
         self.encoding = "UTF-8"
         if "encoding" in self.__m_metadata:
             self.encoding = self.__m_metadata["encoding"]
+        elif "codepage" in self.__m_metadata:
+            self.encoding = str(self.__m_metadata["codepage"])
 
+        # We try to decode based on the encoding found and if we can't, we try
+        # to detect the encoding and decode that
+        self.__m_name = decode_string(self.__m_metadata["info"]["name"], 
self.encoding)
+
         # Get list of files from torrent info
         paths = {}
         if self.__m_metadata["info"].has_key("files"):
             prefix = ""
             if len(self.__m_metadata["info"]["files"]) > 1:
-                prefix = 
self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8")
+                prefix = self.__m_name
 
             for index, f in enumerate(self.__m_metadata["info"]["files"]):
-                path = os.path.join(prefix, *f["path"]).decode(self.encoding, 
"replace").encode("utf8")
+                path = decode_string(os.path.join(prefix, *f["path"]))
                 f["index"] = index
                 paths[path] = f
 
@@ -75,24 +102,24 @@
             self.__m_files_tree = file_tree.get_tree()
         else:
             self.__m_files_tree = {
-                self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8"): (self.__m_metadata["info"]["length"], True)
+                self.__m_name: (self.__m_metadata["info"]["length"], True)
             }
 
         self.__m_files = []
         if self.__m_metadata["info"].has_key("files"):
             prefix = ""
             if len(self.__m_metadata["info"]["files"]) > 1:
-                prefix = 
self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8")
+                prefix = self.__m_name
 
             for f in self.__m_metadata["info"]["files"]:
                 self.__m_files.append({
-                    'path': os.path.join(prefix, 
*f["path"]).decode(self.encoding, "replace").encode("utf8"),
+                    'path': decode_string(os.path.join(prefix, *f["path"])),
                     'size': f["length"],
                     'download': True
                 })
         else:
             self.__m_files.append({
-                "path": 
self.__m_metadata["info"]["name"].decode(self.encoding, 
"replace").encode("utf8"),
+                "path": self.__m_name,
                 "size": self.__m_metadata["info"]["length"],
                 "download": True
         })
@@ -108,7 +135,7 @@
 
     @property
     def name(self):
-        return 
self.__m_metadata["info"]["name"].decode(self.encoding).encode("utf8")
+        return self.__m_name
 
     @property
     def info_hash(self):



--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"deluge-commit" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at 
http://groups.google.com/group/deluge-commit?hl=en
-~----------~----~----~----~------~----~------~--~---

r5251 - Fix showing non-utf8 encoded torrents in add torre...

Reply via email to