[Xbmc-addons] Scripts branch, eden, updated. 3e1100676e0e278d01f46f2d39a74bad2d139792

Martijn Kaijser Tue, 18 Sep 2012 14:10:35 -0700

The branch, eden has been updated
       via  3e1100676e0e278d01f46f2d39a74bad2d139792 (commit)
      from  8f341e3751f06628af6d6c33ca2da9863f04a03e (commit)


- Log -----------------------------------------------------------------
http://xbmc.git.sourceforge.net/git/gitweb.cgi?p=xbmc/scripts;a=commit;h=3e1100676e0e278d01f46f2d39a74bad2d139792

commit 3e1100676e0e278d01f46f2d39a74bad2d139792
Author: Martijn Kaijser <[email protected]>
Date:   Tue Sep 18 23:10:11 2012 +0200

    [script.module.parsedom] -v1.2.0

diff --git a/script.module.parsedom/addon.xml b/script.module.parsedom/addon.xml
index ac2404f..541798d 100644
--- a/script.module.parsedom/addon.xml
+++ b/script.module.parsedom/addon.xml
@@ -1,5 +1,5 @@
 <?xml version='1.0' encoding='UTF-8' standalone='yes'?>
-<addon id='script.module.parsedom' version='1.1.0' name='Parsedom for xbmc 
plugins' provider-name='TheCollective'>
+<addon id='script.module.parsedom' version='1.2.0' name='Parsedom for xbmc 
plugins' provider-name='TheCollective'>
   <requires>
     <import addon='xbmc.python' version='2.0'/>
   </requires>
diff --git a/script.module.parsedom/changelog.txt 
b/script.module.parsedom/changelog.txt
index 7e67f37..97a7234 100644
--- a/script.module.parsedom/changelog.txt
+++ b/script.module.parsedom/changelog.txt
@@ -1,3 +1,7 @@
+[B]Version 1.2.0[/B]
+- fetchPage should default to utf-8 encoding
+- parseDOM should handle utf-8 encoding
+
 [B]Version 1.1.0[/B]
 - Handle \t that breaks DOM variable extraction
 - Added extractJS function
diff --git a/script.module.parsedom/lib/CommonFunctions.py 
b/script.module.parsedom/lib/CommonFunctions.py
index c72a571..50ce263 100644
--- a/script.module.parsedom/lib/CommonFunctions.py
+++ b/script.module.parsedom/lib/CommonFunctions.py
@@ -24,12 +24,14 @@ import io
 import inspect
 import time
 import HTMLParser
+#import chardet
+import json
 
-version = "1.1.0"
-plugin = "CommonFunctions-" + version
+version = u"1.2.0"
+plugin = u"CommonFunctions-" + version
 print plugin
 
-USERAGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.8) 
Gecko/20100722 Firefox/3.6.8"
+USERAGENT = u"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.8) 
Gecko/20100722 Firefox/3.6.8"
 
 if hasattr(sys.modules["__main__"], "xbmc"):
     xbmc = sys.modules["__main__"].xbmc
@@ -56,13 +58,13 @@ if hasattr(sys.modules["__main__"], "opener"):
 
 
 # This function raises a keyboard for user input
-def getUserInput(title="Input", default="", hidden=False):
+def getUserInput(title=u"Input", default=u"", hidden=False):
     log("", 5)
     result = None
 
     # Fix for when this functions is called with default=None
     if not default:
-        default = ""
+        default = u""
 
     keyboard = xbmc.Keyboard(default, title)
     keyboard.setHiddenInput(hidden)
@@ -76,13 +78,13 @@ def getUserInput(title="Input", default="", hidden=False):
 
 
 # This function raises a keyboard numpad for user input
-def getUserInputNumbers(title="Input", default=""):
+def getUserInputNumbers(title=u"Input", default=u""):
     log("", 5)
     result = None
 
     # Fix for when this functions is called with default=None
     if not default:
-        default = ""
+        default = u""
 
     keyboard = xbmcgui.Dialog()
     result = keyboard.numeric(0, title, default)
@@ -136,7 +138,7 @@ def stripTags(html):
 def _getDOMContent(html, name, match, ret):  # Cleanup
     log("match: " + match, 3)
 
-    endstr = "</" + name  # + ">"
+    endstr = u"</" + name  # + ">"
 
     start = html.find(match)
     end = html.find(endstr, start)
@@ -153,7 +155,7 @@ def _getDOMContent(html, name, match, ret):  # Cleanup
 
     log("start: %s, len: %s, end: %s" % (start, len(match), end), 3)
     if start == -1 and end == -1:
-        result = ""
+        result = u""
     elif start > -1 and end > -1:
         result = html[start + len(match):end]
     elif end > -1:
@@ -228,18 +230,20 @@ def _getDOMElements(item, name, attrs):
     log("Done: " + str(type(lst)), 3)
     return lst
 
-def parseDOM(html, name="", attrs={}, ret=False):
+def parseDOM(html, name=u"", attrs={}, ret=False):
     log("Name: " + repr(name) + " - Attrs:" + repr(attrs) + " - Ret: " + 
repr(ret) + " - HTML: " + str(type(html)), 3)
 
-    if isinstance(html, str) or isinstance(html, unicode):
+    if isinstance(html, str): # Should be handled
+        html = [html]
+    elif isinstance(html, unicode):
         html = [html]
     elif not isinstance(html, list):
         log("Input isn't list or string/unicode.")
-        return ""
+        return u""
 
     if not name.strip():
         log("Missing tag name")
-        return ""
+        return u""
 
     ret_lst = []
     for item in html:
@@ -301,9 +305,9 @@ def extractJS(data, function=False, variable=False, 
match=False, evaluate=False,
             del lst[i]
         else:
             log("Cleaning item: " + repr(lst[i]), 4)
-            if lst[i][0] == "\n":
+            if lst[i][0] == u"\n":
                 lst[i] == lst[i][1:]
-            if lst[i][len(lst) -1] == "\n":
+            if lst[i][len(lst) -1] == u"\n":
                 lst[i] == lst[i][:len(lst)- 2]
             lst[i] = lst[i].strip()
 
@@ -318,15 +322,15 @@ def extractJS(data, function=False, variable=False, 
match=False, evaluate=False,
             elif variable:
                 tlst = re.compile(variable +".*?=.*?;", re.M | 
re.S).findall(lst[i])
                 data = []
-                for tmp in tlst:
+                for tmp in tlst: # This breaks for some stuff. "ad_tag": 
"http://ad-emea.doubleclick.net/N4061/pfadx/com.ytpwatch.entertainment/main_563326''
 # ends early, must end with } 
                     cont_char = tmp[0]
                     cont_char = tmp[tmp.find("=") + 1:].strip()
                     cont_char = cont_char[0]
                     if cont_char in "'\"":
-                        log("Using %s as quotation mark" % cont_char, 3)
+                        log("Using %s as quotation mark" % cont_char, 1)
                         tmp = tmp[tmp.find(cont_char) + 1:tmp.rfind(cont_char)]
                     else:
-                        log("No quotation mark found", 3)
+                        log("No quotation mark found", 1)
                         tmp = tmp[tmp.find("=") + 1: tmp.rfind(";")]
 
                     tmp = tmp.strip()
@@ -344,7 +348,11 @@ def extractJS(data, function=False, variable=False, 
match=False, evaluate=False,
             log("Evaluating %s" % lst[i])
             data = lst[i].strip()
             try:
-                lst[i] = eval(data)
+                try:
+                    lst[i] = json.loads(data)
+                except:
+                    log("Couldn't json.loads, trying eval")
+                    lst[i] = eval(data)
             except:
                 log("Couldn't eval: %s from %s" % (repr(data), repr(lst[i])))
 
@@ -393,10 +401,13 @@ def fetchPage(params={}):
         log("connecting to server...", 1)
 
         con = urllib2.urlopen(request)
-        ret_obj["header"] = str(con.info())
+        ret_obj["header"] = con.info()
         ret_obj["new_url"] = con.geturl()
-        if get("no-content", "false") == "false":
-            ret_obj["content"] = con.read()
+        if get("no-content", "false") == u"false" or get("no-content", 
"false") == "false":
+            inputdata = con.read()
+            #data_type = chardet.detect(inputdata)
+            #inputdata = inputdata.decode(data_type["encoding"])
+            ret_obj["content"] = inputdata.decode("utf-8")
 
         con.close()
 
@@ -457,7 +468,7 @@ def makeAscii(data):
         return data.encode('ascii', "ignore")
     except:
         log("Hit except on : " + repr(data))
-        s = ""
+        s = u""
         for i in data:
             try:
                 i.encode("ascii", "ignore")
@@ -474,14 +485,15 @@ def makeAscii(data):
 # This function handles stupid utf handling in python.
 def makeUTF8(data):
     log(repr(data), 5)
+    return data
     try:
-        return data.decode('utf8', 'ignore')
+        return data.decode('utf8', 'xmlcharrefreplace') # was 'ignore'
     except:
         log("Hit except on : " + repr(data))
-        s = ""
+        s = u""
         for i in data:
             try:
-                i.decode("utf8", "ignore")
+                i.decode("utf8", "xmlcharrefreplace") 
             except:
                 log("Can't convert character", 4)
                 continue
@@ -491,12 +503,12 @@ def makeUTF8(data):
         return s
 
 
-def openFile(filepath, options="r"):
+def openFile(filepath, options=u"r"):
     log(repr(filepath) + " - " + repr(options))
     if options.find("b") == -1:  # Toggle binary mode on failure
-        alternate = options + "b"
+        alternate = options + u"b"
     else:
-        alternate = options.replace("b", "")
+        alternate = options.replace(u"b", u"")
 
     try:
         log("Trying normal: %s" % options)
@@ -509,6 +521,6 @@ def openFile(filepath, options="r"):
 def log(description, level=0):
     if dbg and dbglevel > level:
         try:
-            xbmc.log("[%s] %s : '%s'" % (plugin, inspect.stack()[1][3], 
description.encode("utf-8", "ignore")), xbmc.LOGNOTICE)
+            xbmc.log((u"[%s] %s : '%s'" % (plugin, inspect.stack()[1][3], 
description)).decode("utf-8"), xbmc.LOGNOTICE)
         except:
-            xbmc.log("[%s] %s : '%s'" % (plugin, inspect.stack()[1][3], 
description), xbmc.LOGNOTICE)
+            xbmc.log(u"FALLBACK [%s] %s : '%s'" % (plugin, 
inspect.stack()[1][3], repr(description)), xbmc.LOGNOTICE)

-----------------------------------------------------------------------

Summary of changes:
 script.module.parsedom/addon.xml              |    2 +-
 script.module.parsedom/changelog.txt          |    4 +
 script.module.parsedom/lib/CommonFunctions.py |   74 ++++++++++++++----------
 3 files changed, 48 insertions(+), 32 deletions(-)


hooks/post-receive
-- 
Scripts

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Xbmc-addons mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/xbmc-addons

[Xbmc-addons] Scripts branch, eden, updated. 3e1100676e0e278d01f46f2d39a74bad2d139792

Reply via email to