The branch, eden has been updated
via 3e1100676e0e278d01f46f2d39a74bad2d139792 (commit)
from 8f341e3751f06628af6d6c33ca2da9863f04a03e (commit)
- Log -----------------------------------------------------------------
http://xbmc.git.sourceforge.net/git/gitweb.cgi?p=xbmc/scripts;a=commit;h=3e1100676e0e278d01f46f2d39a74bad2d139792
commit 3e1100676e0e278d01f46f2d39a74bad2d139792
Author: Martijn Kaijser <[email protected]>
Date: Tue Sep 18 23:10:11 2012 +0200
[script.module.parsedom] -v1.2.0
diff --git a/script.module.parsedom/addon.xml b/script.module.parsedom/addon.xml
index ac2404f..541798d 100644
--- a/script.module.parsedom/addon.xml
+++ b/script.module.parsedom/addon.xml
@@ -1,5 +1,5 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
-<addon id='script.module.parsedom' version='1.1.0' name='Parsedom for xbmc
plugins' provider-name='TheCollective'>
+<addon id='script.module.parsedom' version='1.2.0' name='Parsedom for xbmc
plugins' provider-name='TheCollective'>
<requires>
<import addon='xbmc.python' version='2.0'/>
</requires>
diff --git a/script.module.parsedom/changelog.txt
b/script.module.parsedom/changelog.txt
index 7e67f37..97a7234 100644
--- a/script.module.parsedom/changelog.txt
+++ b/script.module.parsedom/changelog.txt
@@ -1,3 +1,7 @@
+[B]Version 1.2.0[/B]
+- fetchPage should default to utf-8 encoding
+- parseDOM should handle utf-8 encoding
+
[B]Version 1.1.0[/B]
- Handle \t that breaks DOM variable extraction
- Added extractJS function
diff --git a/script.module.parsedom/lib/CommonFunctions.py
b/script.module.parsedom/lib/CommonFunctions.py
index c72a571..50ce263 100644
--- a/script.module.parsedom/lib/CommonFunctions.py
+++ b/script.module.parsedom/lib/CommonFunctions.py
@@ -24,12 +24,14 @@ import io
import inspect
import time
import HTMLParser
+#import chardet
+import json
-version = "1.1.0"
-plugin = "CommonFunctions-" + version
+version = u"1.2.0"
+plugin = u"CommonFunctions-" + version
print plugin
-USERAGENT = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.8)
Gecko/20100722 Firefox/3.6.8"
+USERAGENT = u"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.2.8)
Gecko/20100722 Firefox/3.6.8"
if hasattr(sys.modules["__main__"], "xbmc"):
xbmc = sys.modules["__main__"].xbmc
@@ -56,13 +58,13 @@ if hasattr(sys.modules["__main__"], "opener"):
# This function raises a keyboard for user input
-def getUserInput(title="Input", default="", hidden=False):
+def getUserInput(title=u"Input", default=u"", hidden=False):
log("", 5)
result = None
# Fix for when this functions is called with default=None
if not default:
- default = ""
+ default = u""
keyboard = xbmc.Keyboard(default, title)
keyboard.setHiddenInput(hidden)
@@ -76,13 +78,13 @@ def getUserInput(title="Input", default="", hidden=False):
# This function raises a keyboard numpad for user input
-def getUserInputNumbers(title="Input", default=""):
+def getUserInputNumbers(title=u"Input", default=u""):
log("", 5)
result = None
# Fix for when this functions is called with default=None
if not default:
- default = ""
+ default = u""
keyboard = xbmcgui.Dialog()
result = keyboard.numeric(0, title, default)
@@ -136,7 +138,7 @@ def stripTags(html):
def _getDOMContent(html, name, match, ret): # Cleanup
log("match: " + match, 3)
- endstr = "</" + name # + ">"
+ endstr = u"</" + name # + ">"
start = html.find(match)
end = html.find(endstr, start)
@@ -153,7 +155,7 @@ def _getDOMContent(html, name, match, ret): # Cleanup
log("start: %s, len: %s, end: %s" % (start, len(match), end), 3)
if start == -1 and end == -1:
- result = ""
+ result = u""
elif start > -1 and end > -1:
result = html[start + len(match):end]
elif end > -1:
@@ -228,18 +230,20 @@ def _getDOMElements(item, name, attrs):
log("Done: " + str(type(lst)), 3)
return lst
-def parseDOM(html, name="", attrs={}, ret=False):
+def parseDOM(html, name=u"", attrs={}, ret=False):
log("Name: " + repr(name) + " - Attrs:" + repr(attrs) + " - Ret: " +
repr(ret) + " - HTML: " + str(type(html)), 3)
- if isinstance(html, str) or isinstance(html, unicode):
+ if isinstance(html, str): # Should be handled
+ html = [html]
+ elif isinstance(html, unicode):
html = [html]
elif not isinstance(html, list):
log("Input isn't list or string/unicode.")
- return ""
+ return u""
if not name.strip():
log("Missing tag name")
- return ""
+ return u""
ret_lst = []
for item in html:
@@ -301,9 +305,9 @@ def extractJS(data, function=False, variable=False,
match=False, evaluate=False,
del lst[i]
else:
log("Cleaning item: " + repr(lst[i]), 4)
- if lst[i][0] == "\n":
+ if lst[i][0] == u"\n":
lst[i] == lst[i][1:]
- if lst[i][len(lst) -1] == "\n":
+ if lst[i][len(lst) -1] == u"\n":
lst[i] == lst[i][:len(lst)- 2]
lst[i] = lst[i].strip()
@@ -318,15 +322,15 @@ def extractJS(data, function=False, variable=False,
match=False, evaluate=False,
elif variable:
tlst = re.compile(variable +".*?=.*?;", re.M |
re.S).findall(lst[i])
data = []
- for tmp in tlst:
+ for tmp in tlst: # This breaks for some stuff. "ad_tag":
"http://ad-emea.doubleclick.net/N4061/pfadx/com.ytpwatch.entertainment/main_563326''
# ends early, must end with }
cont_char = tmp[0]
cont_char = tmp[tmp.find("=") + 1:].strip()
cont_char = cont_char[0]
if cont_char in "'\"":
- log("Using %s as quotation mark" % cont_char, 3)
+ log("Using %s as quotation mark" % cont_char, 1)
tmp = tmp[tmp.find(cont_char) + 1:tmp.rfind(cont_char)]
else:
- log("No quotation mark found", 3)
+ log("No quotation mark found", 1)
tmp = tmp[tmp.find("=") + 1: tmp.rfind(";")]
tmp = tmp.strip()
@@ -344,7 +348,11 @@ def extractJS(data, function=False, variable=False,
match=False, evaluate=False,
log("Evaluating %s" % lst[i])
data = lst[i].strip()
try:
- lst[i] = eval(data)
+ try:
+ lst[i] = json.loads(data)
+ except:
+ log("Couldn't json.loads, trying eval")
+ lst[i] = eval(data)
except:
log("Couldn't eval: %s from %s" % (repr(data), repr(lst[i])))
@@ -393,10 +401,13 @@ def fetchPage(params={}):
log("connecting to server...", 1)
con = urllib2.urlopen(request)
- ret_obj["header"] = str(con.info())
+ ret_obj["header"] = con.info()
ret_obj["new_url"] = con.geturl()
- if get("no-content", "false") == "false":
- ret_obj["content"] = con.read()
+ if get("no-content", "false") == u"false" or get("no-content",
"false") == "false":
+ inputdata = con.read()
+ #data_type = chardet.detect(inputdata)
+ #inputdata = inputdata.decode(data_type["encoding"])
+ ret_obj["content"] = inputdata.decode("utf-8")
con.close()
@@ -457,7 +468,7 @@ def makeAscii(data):
return data.encode('ascii', "ignore")
except:
log("Hit except on : " + repr(data))
- s = ""
+ s = u""
for i in data:
try:
i.encode("ascii", "ignore")
@@ -474,14 +485,15 @@ def makeAscii(data):
# This function handles stupid utf handling in python.
def makeUTF8(data):
log(repr(data), 5)
+ return data
try:
- return data.decode('utf8', 'ignore')
+ return data.decode('utf8', 'xmlcharrefreplace') # was 'ignore'
except:
log("Hit except on : " + repr(data))
- s = ""
+ s = u""
for i in data:
try:
- i.decode("utf8", "ignore")
+ i.decode("utf8", "xmlcharrefreplace")
except:
log("Can't convert character", 4)
continue
@@ -491,12 +503,12 @@ def makeUTF8(data):
return s
-def openFile(filepath, options="r"):
+def openFile(filepath, options=u"r"):
log(repr(filepath) + " - " + repr(options))
if options.find("b") == -1: # Toggle binary mode on failure
- alternate = options + "b"
+ alternate = options + u"b"
else:
- alternate = options.replace("b", "")
+ alternate = options.replace(u"b", u"")
try:
log("Trying normal: %s" % options)
@@ -509,6 +521,6 @@ def openFile(filepath, options="r"):
def log(description, level=0):
if dbg and dbglevel > level:
try:
- xbmc.log("[%s] %s : '%s'" % (plugin, inspect.stack()[1][3],
description.encode("utf-8", "ignore")), xbmc.LOGNOTICE)
+ xbmc.log((u"[%s] %s : '%s'" % (plugin, inspect.stack()[1][3],
description)).decode("utf-8"), xbmc.LOGNOTICE)
except:
- xbmc.log("[%s] %s : '%s'" % (plugin, inspect.stack()[1][3],
description), xbmc.LOGNOTICE)
+ xbmc.log(u"FALLBACK [%s] %s : '%s'" % (plugin,
inspect.stack()[1][3], repr(description)), xbmc.LOGNOTICE)
-----------------------------------------------------------------------
Summary of changes:
script.module.parsedom/addon.xml | 2 +-
script.module.parsedom/changelog.txt | 4 +
script.module.parsedom/lib/CommonFunctions.py | 74 ++++++++++++++----------
3 files changed, 48 insertions(+), 32 deletions(-)
hooks/post-receive
--
Scripts
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Xbmc-addons mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/xbmc-addons