The branch, frodo has been updated
via 902107ea213b7de2b0ff890d22f3607861d74846 (commit)
from 23a3fafc84e850dd1f4545f061a00c849da1633d (commit)
- Log -----------------------------------------------------------------
http://xbmc.git.sourceforge.net/git/gitweb.cgi?p=xbmc/plugins;a=commit;h=902107ea213b7de2b0ff890d22f3607861d74846
commit 902107ea213b7de2b0ff890d22f3607861d74846
Author: beenje <[email protected]>
Date: Mon Apr 29 22:55:35 2013 +0200
[plugin.video.myvideo_de] updated to version 0.9.0
diff --git a/plugin.video.myvideo_de/addon.py b/plugin.video.myvideo_de/addon.py
index b928e34..c1becb0 100644
--- a/plugin.video.myvideo_de/addon.py
+++ b/plugin.video.myvideo_de/addon.py
@@ -29,6 +29,13 @@ STRINGS = {
'no_download_path': 30030,
'set_now?': 30031,
'hls_error': 30032,
+ 'show_my_favs': 30002,
+ 'no_scraper_found': 30003,
+ 'add_to_my_favs': 30004,
+ 'del_from_my_favs': 30005,
+ 'no_my_favs': 30006,
+ 'use_context_menu': 30007,
+ 'to_add': 30008,
}
plugin = Plugin()
@@ -39,14 +46,18 @@ def show_categories():
items = [{
'label': category['title'],
'path': plugin.url_for(
- endpoint='show_subcategories',
+ endpoint='show_path',
path=category['path']
)
} for category in scraper.get_categories()]
items.append({
'label': _('search'),
- 'path': plugin.url_for('video_search')}
- )
+ 'path': plugin.url_for('video_search')
+ })
+ items.append({
+ 'label': _('show_my_favs'),
+ 'path': plugin.url_for('show_my_favs')
+ })
return plugin.finish(items)
@@ -64,83 +75,107 @@ def video_search():
@plugin.route('/search/<search_string>/')
def video_search_result(search_string):
- items = scraper.get_search_result(search_string)
- return __add_items(items)
+ path = scraper.get_search_path(search_string)
+ return show_path(path)
[email protected]('/category/<path>/')
-def show_subcategories(path):
- categories = scraper.get_sub_categories(path)
- items = [{
- 'label': category['title'],
- 'path': plugin.url_for(
- endpoint='show_path',
- path=category['path']
- )
- } for category in categories]
[email protected]('/my_favs/')
+def show_my_favs():
+
+ def context_menu(item_path):
+ context_menu = [(
+ _('del_from_my_favs'),
+ 'XBMC.RunPlugin(%s)' % plugin.url_for('del_from_my_favs',
+ item_path=item_path),
+ )]
+ return context_menu
+
+ my_fav_items = plugin.get_storage('my_fav_items')
+ items = my_fav_items.values()
+ for item in items:
+ item['context_menu'] = context_menu(item['path'])
+ if not items:
+ dialog = xbmcgui.Dialog()
+ dialog.ok(_('no_my_favs'), _('use_context_menu'), _('to_add'))
+ return
return plugin.finish(items)
[email protected]('/<path>/')
[email protected]('/path/<path>/')
def show_path(path):
- items = scraper.get_path(path)
- return __add_items(items)
+ try:
+ items, next_page, prev_page = scraper.get_path(path)
+ except NotImplementedError:
+ plugin.notify(msg=_('no_scraper_found'), title='Path: %s' % path)
+ else:
+ return __add_items(items, next_page, prev_page)
+
+def __add_items(entries, next_page=None, prev_page=None):
+ my_fav_items = plugin.get_storage('my_fav_items')
-def __add_items(entries):
+ def context_menu(item_path, video_id):
+ if not item_path in my_fav_items:
+ context_menu = [(
+ _('add_to_my_favs'),
+ 'XBMC.RunPlugin(%s)' % plugin.url_for(
+ endpoint='add_to_my_favs',
+ item_path=item_path
+ ),
+ )]
+ else:
+ context_menu = [(
+ _('del_from_my_favs'),
+ 'XBMC.RunPlugin(%s)' % plugin.url_for(
+ endpoint='del_from_my_favs',
+ item_path=item_path
+ ),
+ )]
+ if video_id:
+ download_url = plugin.url_for(
+ endpoint='download_video',
+ video_id=video_id
+ )
+ context_menu.append(
+ (_('download'), 'XBMC.RunPlugin(%s)' % download_url)
+ )
+ return context_menu
+
+ temp_items = plugin.get_storage('temp_items')
+ temp_items.clear()
items = []
- update_on_pageswitch = plugin.get_setting('update_on_pageswitch') == 'true'
has_icons = False
- is_update = False
- for entry in entries:
+ i = 0
+ for i, entry in enumerate(entries):
if not has_icons and entry.get('thumb'):
has_icons = True
- if entry.get('pagenination'):
- if entry['pagenination'] == 'PREV':
- if update_on_pageswitch:
- is_update = True
- title = '<< %s %s <<' % (_('page'), entry['title'])
- elif entry['pagenination'] == 'NEXT':
- title = '>> %s %s >>' % (_('page'), entry['title'])
- items.append({
- 'label': title,
- 'thumbnail': 'DefaultFolder.png',
- 'path': plugin.url_for(
- endpoint='show_path',
- path=entry['path']
- )
- })
- elif entry['is_folder']:
+ if entry['is_folder']:
items.append({
'label': entry['title'],
'thumbnail': entry.get('thumb', 'DefaultFolder.png'),
+ 'info': {'count': i + 1},
'path': plugin.url_for(
endpoint='show_path',
path=entry['path']
)
})
else:
- download_url = plugin.url_for(
- endpoint='download_video',
- video_id=entry['video_id']
- )
items.append({
'label': entry['title'],
'thumbnail': entry.get('thumb', 'DefaultVideo.png'),
'info': {
+ 'video_id': entry['video_id'],
+ 'count': i + 1,
'plot': entry.get('description', ''),
- 'studio': entry.get('username', ''),
+ 'studio': entry.get('author', {}).get('name', ''),
'date': entry.get('date', ''),
'year': int(entry.get('year', 0)),
'rating': float(entry.get('rating', 0)),
'votes': unicode(entry.get('votes')),
'views': unicode(entry.get('views', 0))
},
- 'context_menu': [
- (_('download'), 'XBMC.RunPlugin(%s)' % download_url),
- ],
'stream_info': {
- 'video': {'duration': entry.get('length', 0)}
+ 'video': {'duration': entry.get('duration', 0)}
},
'is_playable': True,
'path': plugin.url_for(
@@ -148,11 +183,43 @@ def __add_items(entries):
video_id=entry['video_id']
)
})
+ if prev_page:
+ items.append({
+ 'label': '<< %s %s <<' % (_('page'), prev_page['number']),
+ 'info': {'count': 0},
+ 'thumbnail': 'DefaultFolder.png',
+ 'path': plugin.url_for(
+ endpoint='show_path',
+ path=prev_page['path'],
+ update='true',
+ )
+ })
+ if next_page:
+ items.append({
+ 'label': '>> %s %s >>' % (_('page'), next_page['number']),
+ 'thumbnail': 'DefaultFolder.png',
+ 'info': {'count': i + 2},
+ 'path': plugin.url_for(
+ endpoint='show_path',
+ path=next_page['path'],
+ update='true',
+ )
+ })
+
+ for item in items:
+ temp_items[item['path']] = item
+ item['context_menu'] = context_menu(
+ item['path'], item['info'].get('video_id')
+ )
+ temp_items.sync()
+
+ update_on_pageswitch = plugin.get_setting('update_on_pageswitch', bool)
+ is_update = update_on_pageswitch and 'update' in plugin.request.args
finish_kwargs = {
- #'sort_methods': ('UNSORTED', 'RATING', 'RUNTIME'),
+ 'sort_methods': ('playlist_order', ),
'update_listing': is_update
}
- if has_icons and plugin.get_setting('force_viewmode') == 'true':
+ if has_icons and plugin.get_setting('force_viewmode', bool):
finish_kwargs['view_mode'] = 'thumbnail'
return plugin.finish(items, **finish_kwargs)
@@ -174,7 +241,7 @@ def download_video(video_id):
if 'hls_playlist' in video:
plugin.notify(_('Download not supported'))
return
- if not video['rtmpurl']:
+ elif not video['rtmpurl']:
params = {
'url': video['filepath'] + video['file'],
}
@@ -199,7 +266,7 @@ def watch_video(video_id):
if 'hls_playlist' in video:
__log('watch_video using HLS')
video_url = video['hls_playlist']
- if not video['rtmpurl']:
+ elif not video['rtmpurl']:
__log('watch_video using FLV')
video_url = video['filepath'] + video['file']
__log('wget %s' % video_url)
@@ -225,6 +292,22 @@ def watch_video(video_id):
return plugin.set_resolved_url(video_url)
[email protected]('/my_favs/add/<item_path>')
+def add_to_my_favs(item_path):
+ my_fav_items = plugin.get_storage('my_fav_items')
+ temp_items = plugin.get_storage('temp_items')
+ my_fav_items[item_path] = temp_items[item_path]
+ my_fav_items.sync()
+
+
[email protected]('/my_favs/del/<item_path>')
+def del_from_my_favs(item_path):
+ my_fav_items = plugin.get_storage('my_fav_items')
+ if item_path in my_fav_items:
+ del my_fav_items[item_path]
+ my_fav_items.sync()
+
+
def __keyboard(title, text=''):
keyboard = xbmc.Keyboard(text, title)
keyboard.doModal()
@@ -254,5 +337,3 @@ if __name__ == '__main__':
plugin.run()
except scraper.NetworkError:
plugin.notify(msg=_('network_error'))
- except NotImplementedError:
- plugin.notify(msg=_('hls_error'))
diff --git a/plugin.video.myvideo_de/addon.xml
b/plugin.video.myvideo_de/addon.xml
index ee406a5..c049fc6 100644
--- a/plugin.video.myvideo_de/addon.xml
+++ b/plugin.video.myvideo_de/addon.xml
@@ -1,5 +1,5 @@
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<addon id="plugin.video.myvideo_de" name="MyVideo.de" version="0.2.2"
provider-name="Tristan Fischer ([email protected])">
+<addon id="plugin.video.myvideo_de" name="MyVideo.de" version="0.9.0"
provider-name="Tristan Fischer ([email protected])">
<requires>
<import addon="xbmc.python" version="2.1.0"/>
<import addon="script.module.xbmcswift2" version="2.4.0"/>
diff --git a/plugin.video.myvideo_de/changelog.txt
b/plugin.video.myvideo_de/changelog.txt
index 18713f5..77ed995 100644
--- a/plugin.video.myvideo_de/changelog.txt
+++ b/plugin.video.myvideo_de/changelog.txt
@@ -1,3 +1,13 @@
+0.9.0 (29.04.2013)
+ - code rewrite (class based scrapers)
+ - "My Favorites"- Feature
+ You can now add items or folders to the "My Favorites" folder
+ - new visible plugin structure to match the new website structure
+ - fixed Playback (FLV, HLS, RTMP, RTMPE)
+ - fixed some categories
+ - new infolabels (uploader-username, plot)
+ - better thumbnails on some videos
+
0.2.2 (11.04.2013)
- added possibility to play HLS videos
- fixed thumbnails in fullscreen OSD
diff --git a/plugin.video.myvideo_de/resources/language/English/strings.xml
b/plugin.video.myvideo_de/resources/language/English/strings.xml
index c444849..9b44f44 100644
--- a/plugin.video.myvideo_de/resources/language/English/strings.xml
+++ b/plugin.video.myvideo_de/resources/language/English/strings.xml
@@ -7,6 +7,13 @@
<!-- General entries -->
<string id="30000">Page</string>
<string id="30001">Search</string>
+ <string id="30002">[B]My Favorites[/B]</string>
+ <string id="30003">No Scaper Found</string>
+ <string id="30004">[B]Add to My Favorites[/B]</string>
+ <string id="30005">[B]Del from My Favorites[/B]</string>
+ <string id="30006">No Favorites</string>
+ <string id="30007">You need to use the context menu</string>
+ <string id="30008">to add items or folders.</string>
<!-- Context Menu entries -->
<string id="30020">Download Video</string>
diff --git a/plugin.video.myvideo_de/resources/language/German/strings.xml
b/plugin.video.myvideo_de/resources/language/German/strings.xml
index e572872..14784a3 100644
--- a/plugin.video.myvideo_de/resources/language/German/strings.xml
+++ b/plugin.video.myvideo_de/resources/language/German/strings.xml
@@ -7,6 +7,13 @@
<!-- General entries -->
<string id="30000">Seite</string>
<string id="30001">Suche</string>
+ <string id="30002">[B]Meine Favoriten[/B]</string>
+ <string id="30003">No Scaper Found</string>
+ <string id="30004">[B]Zu Meinen Favoriten[/B]</string>
+ <string id="30005">[B]Aus Meinen Favoriten entfernen[/B]</string>
+ <string id="30006">Keine Favoriten hinzugefügt</string>
+ <string id="30007">Bitte benutze das Kontext-Menu um</string>
+ <string id="30008">Videos oder Ordner hinzuzufügen.</string>
<!-- Context Menu entries -->
<string id="30020">Video herunterladen</string>
diff --git a/plugin.video.myvideo_de/resources/lib/scraper.py
b/plugin.video.myvideo_de/resources/lib/scraper.py
index 9530c48..8aca88d 100644
--- a/plugin.video.myvideo_de/resources/lib/scraper.py
+++ b/plugin.video.myvideo_de/resources/lib/scraper.py
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
-# Copyright (C) 2012 Tristan Fischer ([email protected])
+# Copyright (C) 2013 Tristan Fischer ([email protected])
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -27,10 +27,6 @@ from urllib2 import urlopen, Request, HTTPError, URLError
MAIN_URL = 'http://www.myvideo.de/'
-UA = (
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 '
- '(KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'
-)
GK = (
'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
@@ -40,25 +36,12 @@ GK = (
CATEGORIES = (
{'title': 'Top 100', 'path': 'Top_100'},
{'title': 'Videos', 'path': 'Videos_A-Z'},
- {'title': 'Serien', 'path': 'Serien'},
+ {'title': 'Community', 'path': 'Videos_A-Z/Videos_in_Kategorien'},
+ {'title': 'TV', 'path': 'Serien'},
{'title': 'Filme', 'path': 'Filme'},
{'title': 'Musik', 'path': 'Musik'}
)
-BLOCKED_SUBCATS = (
- '/Videos_A-Z/Video_Flight',
- '/Videos_A-Z/Videos_in_Playlisten',
- '/musik-tv',
- '/channel/Clipgenerator',
- '/echo',
- '/Themen/Sexy',
- '/Top_100/Top_100_Playlisten',
- '/Serien/WWE',
- '/Serien/Serien_Suche',
- '/channel/unforgettable',
- '/webstarts'
-)
-
R_ID = re.compile('watch/([0-9]+)/?')
@@ -70,82 +53,469 @@ def get_categories():
return CATEGORIES
-def get_sub_categories(path):
- __log('get_sub_categories started with path: %s' % path)
- tree = __get_tree(MAIN_URL)
- section = tree.find('div', {'class': 'body topNavFW'})
- sub_cats = []
- link = section.find('a', {'href': '/%s' % path})
- if link:
- for l in link.parent.findAll('a', {'class': 'topsub nArrow'}):
- if l['href'] in BLOCKED_SUBCATS:
- __log('skipping entry with link: %s' % l['href'])
- continue
- elif '/watch/' in l['href']:
- __log('skipping playable entry with link: %s' % l['href'])
- continue
- sub_cats.append({
- 'title': l.span.string.strip(),
- 'path': l['href'][1:]}
- )
- __log('get_sub_categories finished with %d elements' % len(sub_cats))
- return sub_cats
-
-
-def get_search_result(query):
- __log('get_search_result started with path: %s' % query)
+def get_search_path(query):
+ log('get_search_result started with path: %s' % query)
path = '/Videos_A-Z?%s' % urlencode({'searchWord': query})
- items = get_path(path)
- return items
+ return path
+
+
+class BaseScraper(object):
+
+ # Todo Modifiers (Heute, Woche, Monat, ...)
+
+ path_matches = []
+
+ pagination_section_props = []
+ next_page_props = []
+ prev_page_props = []
+
+ subtree_props = []
+ section_props = []
+
+ a_props = []
+ img_props = []
+ duration_props = []
+ author_props = []
+ date_props = []
+
+ needs_cookie = False
+
+ @classmethod
+ def choose_scraper(cls, path):
+ log('Trying to find a matching scraper class for path: "%s"' % path)
+ for subcls in cls.__subclasses__():
+ for path_match in subcls.path_matches:
+ if path_match in path:
+ return subcls()
+
+ def parse(self, tree):
+ sections = self.get_sections(tree)
+ if not sections:
+ print 'Found no sections :('
+ items = (self.parse_item(section) for section in sections)
+ # Need this double generator pass to filter out skipped items
+ items = (i for i in items if i)
+ next_page, prev_page = self.parse_pagination(tree)
+ return items, next_page, prev_page
+
+ def get_sections(self, tree):
+ if self.subtree_props:
+ subtree = tree.find(*self.subtree_props)
+ if subtree:
+ print 'found subtree'
+ tree = subtree
+ sections = tree.findAll(*self.section_props)
+ #print 'sections: %s' % sections
+ return sections
+
+ def parse_item(self, section):
+ a = section.find(*self.a_props)
+ if not a:
+ log('Skipping item: %s' % section)
+ return
+ path = a['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': a['title'] or a.string,
+ 'path': path,
+ 'is_folder': is_folder,
+ 'video_id': video_id,
+ }
+ if self.img_props:
+ item['thumb'] = self.get_img(section)
+ if self.duration_props:
+ duration_elem = section.find(*self.duration_props)
+ if duration_elem and duration_elem.string:
+ item['duration'] = self.format_length(duration_elem.string)
+ if self.author_props:
+ author_elem = section.find(*self.author_props)
+ if author_elem and author_elem.a:
+ item['author'] = {
+ 'name': author_elem.a['title'],
+ 'id': author_elem.a['href'].rsplit('=')[-1]
+ }
+ if self.date_props:
+ date_elem = section.find(*self.date_props)
+ if date_elem and date_elem.string:
+ item['date'] = date_elem.string
+ return item
+
+ def get_img(self, section):
+ img = section.find(*self.img_props)
+ if img:
+ return img.get('longdesc') or img.get('src')
+ else:
+ print 'Error in get_img!'
+
+ def parse_pagination(self, tree):
+
+ def get_path(a_elem):
+ if a['href'] == '#':
+ re_path = re.compile('.src=\'(.*?)\'')
+ path = re_path.search(a['onclick']).group(1)
+ else:
+ path = a['href']
+ return {
+ 'number': a['title'],
+ 'path': path
+ }
+
+ next_page = prev_page = None
+ if self.pagination_section_props:
+ section = tree.find(*self.pagination_section_props)
+ if section:
+ print 'found pagination section'
+ if self.next_page_props:
+ a = section.find(*self.next_page_props)
+ if a:
+ print 'found pagenination next link'
+ next_page = get_path(a)
+ if self.prev_page_props:
+ a = section.find(*self.prev_page_props)
+ if a:
+ print 'found pagenination prev link'
+ prev_page = get_path(a)
+ return next_page, prev_page
+
+ @staticmethod
+ def detect_folder(path):
+ video_id = None
+ is_folder = True
+ m_id = re.search(R_ID, path)
+ if m_id:
+ video_id = m_id.group(1)
+ is_folder = False
+ return is_folder, video_id
+
+ @staticmethod
+ def format_length(length_str):
+ if ' min' in length_str or ' Std.' in length_str:
+ h = m = s = '0'
+ if ' min' in length_str:
+ m, s = length_str.replace(' min', '').split(':')
+ elif ' Std.' in length_str:
+ h, m, s = length_str.replace(' Std.', '').split(':')
+ seconds = int(h) * 3600 + int(m) * 60 + int(s)
+ return seconds
+ return 0
+
+ def log(self):
+ print('MyVideo.de scraper: %s' % msg)
+
+# FIXME re.compile -> r''
+# FIXME turn show name if 'Staffel' in title
+# FIXME Rating/Votes
+# FIXME Plot
+
+
+# Needs to be before TopCategoryScraper
+class TopScraper(BaseScraper):
+ path_matches = ('Top_100/', )
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': re.compile('vThumb')})
+ a_props = ('a', )
+ img_props = ('img', )
+ duration_props = ('span', {'class': 'vViews'})
+ author_props = ('span', {'class': 'nick'})
+
+
+class TopCategoryScraper(BaseScraper):
+ path_matches = ('Top_100', )
+ section_props = ('div', {'id': re.compile('id_[0-9]+_init')})
+ title_div_props = ('div', {'class': re.compile('entry-title hidden')})
+ path_td_props = ('td', {'class': re.compile('shAll')})
+
+ def parse_item(self, section):
+ title_div = section.find(*self.title_div_props)
+ path_td = section.find(*self.path_td_props)
+ path = path_td.a['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': title_div.string.strip(),
+ 'path': path,
+ 'is_folder': is_folder,
+ 'video_id': video_id,
+ }
+ return item
+
+
+# Needs to be before VideoScraper
+class VideoCategoryScraper(BaseScraper):
+ path_matches = ('Videos_in_Kategorien', )
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': 'body floatLeft cGround sTLeft'})
+ a_props = ('div', {'class': 'sCenter kTitle'})
+ img_props = ('img', {'class': 'vThumb kThumb'})
+
+ def parse_item(self, section):
+ div = section.find(*self.a_props)
+ if not div:
+ log('Skipping item: %s' % section)
+ return
+ path = div.a['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': div.a['title'] or div.a.string,
+ 'path': path,
+ 'is_folder': is_folder,
+ 'video_id': video_id,
+ }
+ if self.img_props:
+ item['thumb'] = self.get_img(section)
+ return item
+
+
+class VideoScraper(BaseScraper):
+ path_matches = ('Videos_A-Z', 'Videos_A-Z?', 'Neue_Musik_Videos')
+ pagination_section_props = ('div', {'class': 'pViewBottom'})
+ next_page_props = ('a', {'class': 'pView pnNext'})
+ prev_page_props = ('a', {'class': 'pView pnBack'})
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': re.compile('entry-content')})
+ a_props = ('a', {'class': 'vLink'})
+ img_props = ('img', )
+ duration_props = ('span', {'class': 'vViews'})
+ author_props = ('span', {'class': 'nick'})
+ date_props = ('div', {'class': re.compile('vAdded')})
+
+
+# Needs to be BEFORE ShowOverviewScraper
+class AllShowOverviewScraper(BaseScraper):
+ path_matches = ('Serien/Alle_Serien_A-Z', )
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': 'lBox seriesDetail'})
+ a_props = ('a', )
+ img_props = ('img', {'class': 'vThumb'})
+ needs_cookie = True
+
+ def parse_item(self, section):
+ previous_section = section.previousSibling
+ path = previous_section.a['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': previous_section.a.string,
+ 'path': path,
+ 'is_folder': is_folder,
+ 'video_id': video_id,
+ 'thumb': section.find(*self.img_props)['longdesc']
+ }
+ return item
+
+
+class ShowOverviewScraper(BaseScraper):
+ path_matches = ('Serien/', )
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': re.compile('series_member')})
+ a_props = ('a', {'class': 'series_head'})
+ img_props = ('img', {'class': 'vThumb'})
+ # FIXME: Ganze-Folge property
+
+
+class ShowCategoryScraper(BaseScraper):
+ path_matches = ('Serien', )
+
+ def parse(self, tree):
+ sub_categories = [
+ ('Top Serien', 'Top_100/Top_100_Serien'),
+ ('Alle Serien', '/Serien/Alle_Serien_A-Z'),
+ (' ProSieben', '/Serien/ProSieben'),
+ (' Sat 1', '/Serien/Sat_1'),
+ (' Anime TV', '/Serien/Anime_TV'),
+ (' kabel eins', '/Serien/kabel_eins'),
+ (' sixx', '/Serien/sixx'),
+ (' Sony Retro', '/Serien/Sony_Retro'),
+ (' Your Family Entertainment',
'/Serien/Your_Family_Entertainment'),
+ (' Welt der Wunder', '/Serien/Welt_der_Wunder'),
+ ('Weitere Serien', '/Serien/Weitere_Serien'),
+ ]
+ items = [{
+ 'title': title,
+ 'path': path,
+ 'is_folder': True,
+ 'video_id': None,
+ } for title, path in sub_categories]
+ return items, False, False
+
+
+# Needs to be before MusicChannelScraper and VideoChannelScraper
+class ChannelScraper(BaseScraper):
+ path_matches = ('channel/', 'full_episodes', 'mv_user_branded_content_box')
+
+ def parse(self, tree):
+ for scraper in (MusicChannelScraper, VideoChannelScraper):
+ if tree.find(*scraper.subtree_props):
+ print 'Redirecting to scraper-class: %s' % scraper.__name__
+ return scraper().parse(tree)
+
+
+class VideoChannelScraper(BaseScraper):
+ rex = re.compile('chIDfull_episodes|chIDhighlight_clips')
+ subtree_props = ('div', {'class': rex}) # FIXME
+ section_props = ('div', {'class': re.compile('full_episodes')})
+ a_props = ('a', {'class': 'series_play'})
+ img_props = ('img', {'class': 'vThumb'})
+ duration_props = ('span', {'class': 'vViews'})
+ pagination_section_props = ('div', {'class': 'pViewBottom'})
+ next_page_props = ('a', {'class': 'pView pSmaller pnNext'})
+ prev_page_props = ('a', {'class': 'pView pSmaller pnBack'})
+ # FIXME: add clips
+
+
+class MusicChannelScraper(BaseScraper):
+ subtree_props = ('div', {'class': 'uBList'})
+ section_props = ('div', {'class': 'uBItem'})
+ a_props = ('a', {'class': 'uBTitle uBvTitle'})
+ img_props = ('img', {'class': re.compile('uBThumb uBvThumb')})
+ duration_props = ('span', {'class': 'vViews uBvViews'})
+ pagination_section_props = ('table', {'class': 'pView floatRight'})
+ next_page_props = ('a', {'class': 'pView pnNext'})
+ prev_page_props = ('a', {'class': 'pView pnBack'})
+ # FIXME: add clips
+
+
+class MovieScraper(BaseScraper):
+ path_matches = ('Filme/', 'filme_video_list')
+ subtree_props = ('div', {'class': 'lContent lContFoot'})
+ section_props = ('div', {'class': 'filme_entry'})
+ a_plot_props = ('a', {'class': 'vLink'})
+ div_title_props = ('div', {'class': 'lHead'})
+ img_props = ('img', {'class': 'vThumb'})
+ duration_props = ('span', {'class': 'vViews'})
+ pagination_section_props = ('div', {'class': 'pView pViewBottom'})
+ next_page_props = ('a', {'class': 'pView pnNext'})
+ prev_page_props = ('a', {'class': 'pView pnBack'})
+ # FIXME: add "filmeDetail"
+
+ def parse_item(self, section):
+ next_section = section.nextSibling
+ div_title = next_section.find(*self.div_title_props)
+ a_plot = section.find(*self.a_plot_props)
+ path = a_plot['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': div_title.string,
+ 'path': path,
+ 'description': a_plot['title'],
+ 'is_folder': is_folder,
+ 'video_id': video_id,
+ 'thumb': section.find(*self.img_props)['src']
+ }
+ return item
+
+
+class MovieCategoryScraper(BaseScraper):
+ path_matches = ('Filme', )
+
+ def parse(self, tree):
+ sub_categories = [
+ ('Top Filme', 'Top_100/Top_100_Filme'),
+ ('Neuste Filme',
'/Videos_A-Z?searchChannelID=369&searchChannel=Film'),
+ (' Comedy', '/Filme/Comedy'),
+ (' Action', '/Filme/Action'),
+ (' Horror', '/Filme/Horror'),
+ (' Sci-Fi', '/Filme/Sci-Fi'),
+ (' Thriller', '/Filme/Thriller'),
+ (' Drama', '/Filme/Drama'),
+ (' Western', '/Filme/Western'),
+ (' Dokumentation', '/Filme/Dokumentation'),
+ (' Konzerte', '/Filme/Konzerte'),
+ ('Alle Filme', '/Filme/Alle_Filme'),
+ ]
+ items = [{
+ 'title': title,
+ 'path': path,
+ 'is_folder': True,
+ 'video_id': None,
+ } for title, path in sub_categories]
+ return items, False, False
+
+
+# Needs to be before ArtistOverviewLetterScraper
+class ArtistOverviewScraper(BaseScraper):
+ path_matches = ('Musik_K%C3%BCnstler?lpage', )
+ subtree_props = ('div', {'class': 'lBox mLeftBox music_channels'})
+ section_props = ('div', {'class': 'body floatLeft sTLeft'})
+ a_props = ('a', {'class': 'pPrTitle'})
+ img_props = ('div', {'class': 'pChThumb pPrThumb'})
+
+ def get_img(self, section):
+ img = section.find(*self.img_props)
+ if img and img.find('img'):
+ img = img.find('img')
+ return img.get('longdesc') or img.get('src')
+ else:
+ print 'Error in get_img!'
+
+
+# Needs to be before MusicScraper
+class ArtistOverviewLetterScraper(BaseScraper):
+ path_matches = ('Musik_K%C3%Bcnstler', )
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('td', {'class': 'mView'})
+ a_props = ('a', {'class': 'mView pLetters'})
+
+ def parse_item(self, section):
+ path = section.a['href']
+ is_folder, video_id = self.detect_folder(path)
+ item = {
+ 'title': section.a.string,
+ 'path': path,
+ 'is_folder': is_folder,
+ }
+ return item
+
+
+class MusicScraper(BaseScraper):
+ path_matches = ('Musik/', 'music_videos')
+ pagination_section_props = ('div', {'class': 'pViewBottom'})
+ next_page_props = ('a', {'class': 'pView pSmaller pnNext'})
+ prev_page_props = ('a', {'class': 'pView pSmaller pnBack'})
+ subtree_props = ('div', {'class': 'lContent'})
+ section_props = ('div', {'class': 'floatLeft fRand'})
+ a_props = ('a', {'target': '_top'})
+ img_props = ('img', )
+ duration_props = ('span', {'class': 'vViews'})
+ author_props = ('span', {'class': 'nick'})
+ date_props = ('div', {'class': re.compile('vAdded')})
+
+
+class MusicCategoryScraper(BaseScraper):
+ path_matches = ('Musik', )
+
+ def parse(self, tree):
+ sub_categories = [
+ ('Charts', '/Top_100/Top_100_Single_Charts'),
+ ('Neue Musik Videos', '/Musik/Neue_Musik_Videos'),
+ (' Rock', '/Musik/Rock'),
+ (' Pop', '/Musik/Pop'),
+ (' Rap/R&B', '/Musik/Rap/R%26B'),
+ (' Schlager',
'/Musik/Neue_Musik_Videos?searchChannelID=206&searchChannel=Schlager'),
+ (' Electro',
'/Musik/Neue_Musik_Videos?searchChannelID=205&searchChannel=Electro'),
+ (' Metal',
'/Musik/Neue_Musik_Videos?searchChannelID=204&searchChannel=Metal'),
+ (' RnB',
'/Musik/Neue_Musik_Videos?searchChannelID=207&searchChannel=RnB'),
+ ('Musik Kuenstler', '/Musik/Musik_K%C3%Bcnstler'),
+ ]
+ items = [{
+ 'title': title,
+ 'path': path,
+ 'is_folder': True,
+ 'video_id': None,
+ } for title, path in sub_categories]
+ return items, False, False
def get_path(path):
- __log('get_path started with path: %s' % path)
- parser = None
- if 'Top_100' in path:
- parser = __parse_video_charts
- elif 'filme_video_list' in path:
- parser = __parse_movies
- elif 'video_list' in path:
- parser = __parse_channels
- elif 'mv_charts' in path:
- parser = __parse_channels
- elif 'Charts' in path: # fixme: still needed?
- parser = __parse_video_charts
- elif 'channel' in path:
- parser = __parse_channels
- elif 'playlist' in path: # fixme: needs to be rewritten
- parser = __parse_playlists
- elif 'Musik_K' in path:
- if not 'lpage' in path:
- parser = __parse_letter
- else:
- parser = __parse_music_artists
- elif 'Musik_Videos' in path:
- parser = __parse_video_default
- elif 'Musik' in path:
- parser = __parse_music
- elif 'Filme' in path:
- parser = __parse_movies
- elif 'Kategorien' in path:
- parser = __parse_categories
- elif 'Alle_Serien_A-Z' in path:
- parser = __parse_shows_overview
- elif 'Serien' in path:
- parser = __parse_shows
- elif '/archiv' in path:
- parser = __parse_webstars
- elif 'webstars' in path:
- parser = __parse_webstars_overview
- else:
- parser = __parse_video_default
- tree = __get_tree(MAIN_URL + path)
- __log('Using Parser: %s' % parser.__name__)
- return parser(tree)
+ log('get_path started with path: %s' % path)
+ scraper = BaseScraper.choose_scraper(path)
+ if not scraper:
+ raise NotImplementedError
+ log('Found matching scraper-class: %s' % scraper.__class__.__name__)
+ tree = requester.get_tree(MAIN_URL + path,
needs_cookie=scraper.needs_cookie)
+ return scraper.parse(tree)
def get_video(video_id):
- __log('get_video started with video_id: %s' % video_id)
+ log('get_video started with video_id: %s' % video_id)
r_adv = re.compile('var flashvars={(.+?)}')
r_adv_p = re.compile('(.+?):\'(.+?)\',?')
r_swf = re.compile('swfobject.embedSWF\(\'(.+?)\'')
@@ -157,9 +527,9 @@ def get_video(video_id):
params = {}
encxml = ''
videopage_url = MAIN_URL + 'watch/%s/' % video_id
- html = __get_url(videopage_url, MAIN_URL)
- video['title'] = r_title.search(html).group(1)
- sec = r_adv.search(html).group(1)
+ html = requester.get_url(videopage_url, MAIN_URL)
+ video['title'] = re.search(r_title, html).group(1)
+ sec = re.search(r_adv, html).group(1)
for (a, b) in re.findall(r_adv_p, sec):
if not a == '_encxml':
params[a] = b
@@ -169,23 +539,23 @@ def get_video(video_id):
params['domain'] = 'www.myvideo.de'
xmldata_url = '%s?%s' % (encxml, urlencode(params))
if 'flash_playertype=MTV' in xmldata_url:
- __log('get_video avoiding MTV player')
+ log('get_video avoiding MTV player')
xmldata_url = (
'http://www.myvideo.de/dynamic/get_player_video_xml.php'
'?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
) % video_id
- enc_data = __get_url(xmldata_url, videopage_url).split('=')[1]
+ enc_data = requester.get_url(xmldata_url, videopage_url).split('=')[1]
enc_data_b = unhexlify(enc_data)
sk = __md5(b64decode(b64decode(GK)) + __md5(str(video_id)))
dec_data = __rc4crypt(enc_data_b, sk)
- rtmpurl = r_rtmpurl.search(dec_data).group(1)
+ rtmpurl = re.search(r_rtmpurl, dec_data).group(1)
video['rtmpurl'] = unquote(rtmpurl)
if 'myvideo2flash' in video['rtmpurl']:
- __log('get_video forcing RTMPT')
+ log('get_video forcing RTMPT')
video['rtmpurl'] = video['rtmpurl'].replace('rtmpe://', 'rtmpt://')
- playpath = r_playpath.search(dec_data).group(1)
+ playpath = re.search(r_playpath, dec_data).group(1)
video['file'] = unquote(playpath)
- m_filepath = r_path.search(dec_data)
+ m_filepath = re.search(r_path, dec_data)
video['filepath'] = m_filepath.group(1)
if not video['file'].endswith('f4m'):
ppath, prefix = unquote(playpath).split('.')
@@ -194,485 +564,57 @@ def get_video(video_id):
video['hls_playlist'] = (
video['filepath'] + video['file']
).replace('.f4m', '.m3u8')
- swfobj = r_swf.search(html).group(1)
+ swfobj = re.search(r_swf, html).group(1)
video['swfobj'] = unquote(swfobj)
video['pageurl'] = videopage_url
return video
-def __parse_video_charts(tree):
- r_div = re.compile('vThumb')
- subtree = tree.find('div', {'class': 'lContent'})
- sections = subtree.findAll('div', {'class': r_div})
- items = []
- for sec in sections:
- path = sec.a['href']
- is_folder, video_id = __detect_folder(path)
- title = sec.a['title']
- thumb = __get_thumb(sec.img)
+class SessionRequester(object):
+
+ def __init__(self):
+ self.cookie = None
+
+ def get_tree(self, url, referer=MAIN_URL, needs_cookie=False):
+ if needs_cookie and not self.cookie:
+ # do a useless request to get a cookie...
+ self.get_url(MAIN_URL)
+ html = self.get_url(url, referer)
+ html = html.decode('utf-8', 'ignore') # Fix MyVideo.de bad enc
+ tree = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
+ return tree
+
+ def get_url(self, url, referer=MAIN_URL):
+ url = url.replace('.de//', '.de/') # FIXME
+ log('SessionRequester.get_url opening url: %s' % url)
+ request = Request(url)
+ headers = [
+ ('Accept', ('text/html,application/xhtml+xml,'
+ 'application/xml;q=0.9,*/*;q=0.8')),
+ ('User-Agent', ('Mozilla/5.0 (Windows NT 6.1; WOW64) '
+ 'AppleWebKit/537.36 (KHTML, like Gecko) '
+ 'Chrome/27.0.1453.15 Safari/537.36')),
+ ('Accept-Encoding', 'deflate,sdch'),
+ ('Accept-Language', 'de-DE,de;q=0.8,en-US;q=0.6,en;q=0.4'),
+ ]
+ for header in headers:
+ request.add_header(*header)
+ if referer:
+ request.add_header('Referer', referer)
+ if self.cookie:
+ request.add_header('Cookie', self.cookie)
try:
- length_str = sec.span.string
- length = __format_length(length_str)
- except AttributeError:
- length = '0:00'
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'length': length,
- 'path': path,
- 'is_folder': is_folder,
- 'video_id': video_id
- })
- __log('__parse_video_charts finished with %d elements' % len(items))
- return items
-
-
-def __parse_video_default(tree):
- subtree = tree.find('div', {'class': 'lContent'})
- r_td = re.compile('hslice.*?video_list')
- items = []
- pagination = subtree.find('div', {'class': 'pView'})
- if pagination:
- prev_link = pagination.find('a', {'class': 'pView pnBack'})
- if prev_link:
- items.append({
- 'title': prev_link['title'],
- 'pagenination': 'PREV',
- 'path': prev_link['href']
- })
- next_link = pagination.find('a', {'class': 'pView pnNext'})
- if next_link:
- items.append({
- 'title': next_link['title'],
- 'pagenination': 'NEXT',
- 'path': next_link['href']
- })
- sections = subtree.findAll('div', {'class': r_td})
- for sec in sections:
- link = sec.find('a', {'class': 'vLink'})
- if not link:
- continue
- path = link['href']
- is_folder, video_id = __detect_folder(path)
- title = link['title']
- thumb = __get_thumb(link.img)
- length_str = sec.find('span', {'class': 'vViews'}).string
- length = __format_length(length_str)
- username = sec.find('span', {'class': 'nick'}).a['title']
- span = sec.find('span', {'id': 'vc%s' % video_id})
- if span:
- views = span.string.replace('.', '')
- else:
- views = 0
- date = sec.find('div', {'class': 'sCenter vAdded'}).string
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'length': length,
- 'path': path,
- 'is_folder': is_folder,
- 'username': username,
- 'views': views,
- 'date': date,
- 'video_id': video_id
- })
- __log('__parse_video_default finished with %d elements' % len(items))
- return items
-
-
-def __parse_music(tree):
- r_td = re.compile('floatLeft fRand')
- subtree = tree.find('div', {'class': 'lContent'})
- sections = subtree.findAll('div', {'class': r_td})
- items = []
- for sec in sections:
- div = sec.find('div', {'class': 'vThumb chThumb'})
- if div:
- path = div.a['href']
- is_folder, video_id = __detect_folder(path)
- title = div.a['title']
- thumb = __get_thumb(div.img)
- length_str = div.find('span', {'class': 'vViews'}).string
- length = __format_length(length_str)
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'length': length,
- 'path': path,
- 'is_folder': is_folder,
- 'video_id': video_id
- })
- __log('__parse_music finished with %d elements' % len(items))
- return items
-
-
-def __parse_categories(tree):
- r_td = re.compile('body floatLeft')
- sections = tree.findAll('div', {'class': r_td})
- items = []
- for sec in sections:
- d = sec.find('div', {'class': 'sCenter kTitle'})
- if not d:
- continue
- path = d.a['href']
- is_folder = True
- title = d.a.string
- thumb = __get_thumb(sec.find('div', {'class': 'vThumb kThumb'}).a.img)
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_categories finished with %d elements' % len(items))
- return items
-
-
-def __parse_shows_overview(tree):
- subtree = tree.find('div', {'class': 'lContent'})
- sections = subtree.findAll('div', {'class': 'lBox seriesDetail'})
- items = []
- for sec in sections:
- prevs = sec.previousSibling
- path = prevs.a['href']
- is_folder = True
- title = prevs.a.string
- thumb = __get_thumb(sec.find(
- 'div', {'class': 'vThumb pChThumb'}).div.img
- )
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_shows_overview finished with %d elements' % len(items))
- return items
-
-
-def __parse_webstars_overview(tree):
- subtree = tree.find('div', {'class': 'content grid_12'})
- sections = subtree.findAll('div')
- items = []
- r_archiv = re.compile('/archiv')
- for sec in sections:
- if sec.a:
- path = sec.find('a', {'href': r_archiv})['href']
- is_folder = True
- title = sec.a.img['alt']
- thumb = __get_thumb(sec.find('img'))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_webstars_overview finished with %d elements' % len(items))
- return items
-
-
-def __parse_webstars(tree):
- subtree = tree.find('div', {'class': 'video-list videos'})
- a_elements = subtree.findAll('a', recursive=False)
- items = []
- for a_element in a_elements:
- path = a_element['href']
- is_folder, video_id = __detect_folder(path)
- title = a_element.find('span', {'class': 'headline-sub-sub'}).string
- thumb = __get_thumb(a_element.find('img'))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder,
- 'video_id': video_id
- })
- pagination = tree.find('div', {'class': re.compile('video_pager')})
- if pagination:
- prev_link = pagination.find('a', text=u'\u25c4')
- if prev_link and prev_link.parent.get('href'):
- items.append({
- 'title': '',
- 'pagenination': 'PREV',
- 'path': prev_link.parent['href']
- })
- next_link = pagination.find('a', text=u'\u25ba')
- if next_link and next_link.parent.get('href'):
- items.append({
- 'title': '',
- 'pagenination': 'NEXT',
- 'path': next_link.parent['href']
- })
- __log('__parse_webstars finished with %d elements' % len(items))
- return items
-
-
-def __parse_playlists(tree):
- subtree = tree.find('div', {'class': 'globalBxBorder globalBx'})
- sections = subtree.findAll('div', {'class': 'vds_video_sidebar_item'})
- items = []
- for sec in sections:
- d = sec.find('div', {'class': 'nTitle'})
- title = d.a['title']
- path = d.a['href']
- is_folder = True
- thumb = __get_thumb(sec.find('img', {'class': 'vThumb nThumb pThumb'}))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_playlists finished with %d elements' % len(items))
- return items
-
-
-def __parse_channels(tree):
- r_div = re.compile('lBox floatLeft qLeftBox charts_box')
- r_td = re.compile('body floatLeft')
- subtree = tree.find('div', {'class': r_div})
- subtree2 = tree.find('div', {'class': 'uBList'})
- items = []
- if subtree: # video channel
- __log('__parse_channels assuming video channel')
- r_pagination = re.compile('pViewBottom')
- r_pagelink = re.compile('src=\'(.+?)\'')
- pagination = tree.find('div', {'class': r_pagination})
- if pagination:
- prev_link = pagination.find(
- 'a', {'class': 'pView pSmaller pnBack'}
- )
- if prev_link:
- link = r_pagelink.search(prev_link['onclick']).group(1)
- items.append({
- 'title': prev_link['title'],
- 'pagenination': 'PREV',
- 'path': link
- })
- next_link = pagination.find(
- 'a', {'class': 'pView pSmaller pnNext'}
- )
- if next_link:
- link = r_pagelink.search(next_link['onclick']).group(1)
- items.append({
- 'title': next_link['title'],
- 'pagenination': 'NEXT',
- 'path': link
- })
- sections = subtree.findAll('div', {'class': r_td})
- for sec in sections:
- d = sec.find('div', {'class': 'pChHead'})
- if d:
- title = d.a['title']
- path = d.a['href']
- is_folder, video_id = __detect_folder(path)
- length_str = sec.find('span', {'class': 'vViews'}).string
- length = __format_length(length_str)
- thumb = __get_thumb(sec.find('img', {'class': 'vThumb'}))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'length': length,
- 'video_id': video_id,
- 'is_folder': is_folder
- })
- elif subtree2: # music channel
- __log('__parse_channels assuming music channel')
- r_pagination = re.compile('pView')
- r_pagelink = re.compile('src=\'(.+?)\'')
- pagination = tree.find('table', {'class': r_pagination})
- if pagination:
- prev_link = pagination.find('a', {'class': 'pView pnBack'})
- if prev_link:
- link = r_pagelink.search(prev_link['onclick']).group(1)
- items.append({
- 'title': prev_link['title'],
- 'pagenination': 'PREV',
- 'path': link
- })
- next_link = pagination.find('a', {'class': 'pView pnNext'})
- if next_link:
- link = r_pagelink.search(next_link['onclick']).group(1)
- items.append({
- 'title': next_link['title'],
- 'pagenination': 'NEXT',
- 'path': link
- })
- sections = subtree2.findAll('div', {'class': 'uBItem'})
- for sec in sections:
- d = sec.find('div', {'class': 'sCenter uBTitle'})
- title = d.a.string
- path = d.a['href']
- is_folder, video_id = __detect_folder(path)
- length_str = sec.find('span', {'class': 'vViews uBvViews'}).string
- length = __format_length(length_str)
- thumb = __get_thumb(sec.find('img', {'class': 'uBThumb uBvThumb'}))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'length': length,
- 'video_id': video_id,
- 'is_folder': is_folder
- })
- __log('__parse_channels finished with %d elements' % len(items))
- return items
-
-
-def __parse_shows(tree):
- r_td = re.compile('body .*? series_member')
- subtree = tree.find('div', {'class': 'lContent'})
- items = []
- if subtree:
- sections = subtree.findAll('div', {'class': r_td})
- for sec in sections:
- d = sec.find('div', {'class': 'pChHead'})
- title = d.a.string
- path = d.a['href']
- is_folder = True
- thumb = __get_thumb(sec.find('img', {'class': 'vThumb'}))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_shows finished with %d elements' % len(items))
- return items
-
-
-def __parse_movies(tree):
- r_pagination = re.compile('pView')
- r_pagelink = re.compile('src=\'(.+?)\'')
- items = []
- pagination = tree.find('div', {'class': r_pagination})
- if pagination:
- prev_link = pagination.find('a', {'class': 'pView pnBack'})
- if prev_link:
- link = r_pagelink.search(prev_link['onclick']).group(1)
- items.append({
- 'title': prev_link['title'],
- 'pagenination': 'PREV',
- 'path': link
- })
- next_link = pagination.find('a', {'class': 'pView pnNext'})
- if next_link:
- link = r_pagelink.search(next_link['onclick']).group(1)
- items.append({
- 'title': next_link['title'],
- 'pagenination': 'NEXT',
- 'path': link
- })
- sections = tree.findAll('div', {'class': 'filme_entry'})
- for sec in sections:
- d = sec.find('div', {'class': 'vTitle'})
- title = d.a['title']
- path = d.a['href']
- is_folder, video_id = __detect_folder(path)
- length_str = sec.find('span', {'class': 'vViews'}).string
- length = __format_length(length_str)
- thumb = __get_thumb(sec.find('img', {'class': 'vThumb'}))
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'length': length,
- 'video_id': video_id,
- 'is_folder': is_folder
- })
- __log('__parse_movies finished with %d elements' % len(items))
- return items
-
-
-def __parse_letter(tree):
- sections = tree.findAll('td', {'class': 'mView'})
- items = []
- for sec in sections:
- title = sec.a.string.strip()
- path = sec.a['href']
- is_folder = True
- items.append({
- 'title': title,
- 'path': path,
- 'is_folder': is_folder
- })
- __log('__parse_letter finished with %d elements' % len(items))
- return items
-
-
-def __parse_music_artists(tree):
- subtree = tree.find('div', {'class': 'lBox mLeftBox music_channels'})
- items = []
- if subtree:
- sections = subtree.findAll('div', {'class': 'body floatLeft sTLeft'})
- for sec in sections:
- d = sec.find('div', {'class': 'pChThumb pPrThumb'})
- title = d.a['title']
- path = d.a['href']
- is_folder, video_id = __detect_folder(path)
- thumb = __get_thumb(d.img)
- items.append({
- 'title': title,
- 'thumb': thumb,
- 'path': path,
- 'video_id': video_id,
- 'is_folder': is_folder
- })
- __log('__parse_music_artists finished with %d elements' % len(items))
- return items
-
-
-def __format_length(length_str):
- h = m = s = '0'
- if ' min' in length_str:
- m, s = length_str.replace(' min', '').split(':')
- elif ' Std.' in length_str:
- h, m, s = length_str.replace(' Std.', '').split(':')
- seconds = int(h) * 3600 + int(m) * 60 + int(s)
- return seconds
-
-
-def __detect_folder(path):
- video_id = None
- is_folder = True
- m_id = R_ID.search(path)
- if m_id:
- video_id = m_id.group(1)
- is_folder = False
- return is_folder, video_id
-
-
-def __get_thumb(img):
- return img.get('longdesc') or img.get('src')
-
-
-def __get_tree(url, referer=None):
- html = __get_url(url, referer)
- tree = BeautifulSoup(html, convertEntities=BeautifulSoup.HTML_ENTITIES)
- return tree
-
-
-def __get_url(url, referer=None):
- __log('__get_url opening url: %s' % url)
- req = Request(url)
- if referer:
- req.add_header('Referer', referer)
- req.add_header(
- 'Accept', (
- 'text/html,application/xhtml+xml,'
- 'application/xml;q=0.9,*/*;q=0.8'
- )
- )
- req.add_header('User-Agent', UA)
- try:
- html = urlopen(req).read()
- except HTTPError, error:
- raise NetworkError('HTTPError: %s' % error)
- except URLError, error:
- raise NetworkError('URLError: %s' % error)
- __log('__get_url got %d bytes' % len(html))
- return html
+ response = urlopen(request)
+ if response.headers.get('Set-Cookie'):
+ self.cookie = response.headers.get('Set-Cookie')
+ log('SessionRequester.get_url got a cookie:%s' % self.cookie)
+ html = response.read()
+ except HTTPError, error:
+ raise NetworkError('HTTPError: %s' % error)
+ except URLError, error:
+ raise NetworkError('URLError: %s' % error)
+ log('SessionRequester.get_url got %d bytes' % len(html))
+ return html
def __rc4crypt(data, key):
@@ -696,5 +638,8 @@ def __md5(s):
return hashlib.md5(s).hexdigest()
-def __log(msg):
+def log(msg):
print('MyVideo.de scraper: %s' % msg)
+
+
+requester = SessionRequester()
-----------------------------------------------------------------------
Summary of changes:
plugin.video.myvideo_de/addon.py | 187 +++-
plugin.video.myvideo_de/addon.xml | 2 +-
plugin.video.myvideo_de/changelog.txt | 10 +
.../resources/language/English/strings.xml | 7 +
.../resources/language/German/strings.xml | 7 +
plugin.video.myvideo_de/resources/lib/scraper.py | 1089 ++++++++++----------
plugin.video.myvideo_de/resources/lib/test.py | 57 +
7 files changed, 733 insertions(+), 626 deletions(-)
create mode 100644 plugin.video.myvideo_de/resources/lib/test.py
hooks/post-receive
--
Plugins
------------------------------------------------------------------------------
Try New Relic Now & We'll Send You this Cool Shirt
New Relic is the only SaaS-based application performance monitoring service
that delivers powerful full stack analytics. Optimize and monitor your
browser, app, & servers with just a few lines of code. Try New Relic
and get this awesome Nerd Life shirt! http://p.sf.net/sfu/newrelic_d2d_apr
_______________________________________________
Xbmc-addons mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/xbmc-addons