Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2019-03-11 11:15:22 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.28833 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Mon Mar 11 11:15:22 2019 rev:98 rq:683048 version:2019.03.09 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2019-03-05 12:24:30.368856355 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/python-youtube-dl.changes 2019-03-11 11:15:30.785337175 +0100 @@ -1,0 +2,29 @@ +Fri Mar 8 20:36:53 UTC 2019 - Sebastien CHAVAUX <seb95passionli...@opensuse.org> + +- Update to new upstream release 2019.03.09 + * [extractor/common] Use compat_etree_Element + * [compat] Introduce compat_etree_Element + * [extractor/common] Fallback url to base URL for DASH formats + * [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode + * [extractor/common] Return MPD manifest as format's url meta field (#20242) + * [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + * [francetv:site] Relax video id regular expression (#20268) + * [toutv] Detect invalid login error + * [toutv] Fix authentication (#20261) + * [urplay] Extract timestamp (#20235) + * [openload] Add support for oload.space (#20246) + * [facebook] Improve uploader extraction (#20250) + * [bbc] Use compat_etree_Element + * [crunchyroll] Use compat_etree_Element + * [npo] Improve ISM extraction + * [rai] Improve extraction (#20253) + * [paramountnetwork] Fix mgid extraction (#20241) + * [libsyn] Improve extraction (#20229) + * [youtube] Add more invidious instances to URL regular expression (#20228) + * [spankbang] Fix extraction (#20023) + * [espn] Extend URL regular expression (#20013) + * [sixplay] Handle videos with empty assets (#20016) + * [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + +------------------------------------------------------------------- @@ -15 +44 @@ -Sun Feb 17 22:01:42 UTC 2019 - Sebastien CHAVAUX <seb95.s...@gmail.com> +Sun Feb 17 21:50:30 UTC 2019 - Sebastien CHAVAUX <seb95.s...@gmail.com> @@ -40 +69 @@ -Tue Feb 12 19:17:32 UTC 2019 - Sebastien CHAVAUX <seb95passionli...@opensuse.org> +Tue Feb 12 19:13:12 UTC 2019 - Sebastien CHAVAUX <seb95passionli...@opensuse.org> @@ -66,0 +96,16 @@ +------------------------------------------------------------------- +Sun Jan 27 16:38:11 UTC 2019 - Sebastien CHAVAUX <seb95passionli...@opensuse.org> + +- Update to new upstream release 2019.01.27 + * [extractor/common] Extract season in _json_ld + * [postprocessor/ffmpeg] Fallback to ffmpeg/avconv for audio codec detection + (#681) + * [vice] Fix extraction for locked videos (#16248) + * [wakanim] Detect DRM protected videos + * [wakanim] Add support for wakanim.tv (#14374) + * [usatoday] Fix extraction for videos with custom brightcove partner id + (#18990) + * [drtv] Fix extraction (#18989) + * [nhk] Extend URL regular expression (#18968) + * [go] Fix Adobe Pass requests for Disney Now (#18901) + * [openload] Add support for oload.club (#18969) --- /work/SRC/openSUSE:Factory/youtube-dl/youtube-dl.changes 2019-03-05 12:24:34.248855172 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.28833/youtube-dl.changes 2019-03-11 11:15:33.493336202 +0100 @@ -1,0 +2,29 @@ +Fri Mar 8 20:36:53 UTC 2019 - Sebastien CHAVAUX <seb95passionli...@opensuse.org> + +- Update to new upstream release 2019.03.09 + * [extractor/common] Use compat_etree_Element + * [compat] Introduce compat_etree_Element + * [extractor/common] Fallback url to base URL for DASH formats + * [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode + * [extractor/common] Return MPD manifest as format's url meta field (#20242) + * [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + * [francetv:site] Relax video id regular expression (#20268) + * [toutv] Detect invalid login error + * [toutv] Fix authentication (#20261) + * [urplay] Extract timestamp (#20235) + * [openload] Add support for oload.space (#20246) + * [facebook] Improve uploader extraction (#20250) + * [bbc] Use compat_etree_Element + * [crunchyroll] Use compat_etree_Element + * [npo] Improve ISM extraction + * [rai] Improve extraction (#20253) + * [paramountnetwork] Fix mgid extraction (#20241) + * [libsyn] Improve extraction (#20229) + * [youtube] Add more invidious instances to URL regular expression (#20228) + * [spankbang] Fix extraction (#20023) + * [espn] Extend URL regular expression (#20013) + * [sixplay] Handle videos with empty assets (#20016) + * [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + +------------------------------------------------------------------- Old: ---- youtube-dl-2019.03.01.tar.gz youtube-dl-2019.03.01.tar.gz.sig New: ---- youtube-dl-2019.03.09.tar.gz youtube-dl-2019.03.09.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.ZNGaIc/_old 2019-03-11 11:15:35.445335501 +0100 +++ /var/tmp/diff_new_pack.ZNGaIc/_new 2019-03-11 11:15:35.445335501 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2019.03.01 +Version: 2019.03.09 Release: 0 Summary: A python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.ZNGaIc/_old 2019-03-11 11:15:35.469335493 +0100 +++ /var/tmp/diff_new_pack.ZNGaIc/_new 2019-03-11 11:15:35.473335491 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2019.03.01 +Version: 2019.03.09 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2019.03.01.tar.gz -> youtube-dl-2019.03.09.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2019-02-28 19:03:47.000000000 +0100 +++ new/youtube-dl/ChangeLog 2019-03-08 20:53:15.000000000 +0100 @@ -1,3 +1,34 @@ +version 2019.03.09 + +Core +* [extractor/common] Use compat_etree_Element ++ [compat] Introduce compat_etree_Element +* [extractor/common] Fallback url to base URL for DASH formats +* [extractor/common] Do not fail on invalid data while parsing F4M manifest + in non fatal mode +* [extractor/common] Return MPD manifest as format's url meta field (#20242) +* [utils] Strip #HttpOnly_ prefix from cookies files (#20219) + +Extractors +* [francetv:site] Relax video id regular expression (#20268) +* [toutv] Detect invalid login error +* [toutv] Fix authentication (#20261) ++ [urplay] Extract timestamp (#20235) ++ [openload] Add support for oload.space (#20246) +* [facebook] Improve uploader extraction (#20250) +* [bbc] Use compat_etree_Element +* [crunchyroll] Use compat_etree_Element +* [npo] Improve ISM extraction +* [rai] Improve extraction (#20253) +* [paramountnetwork] Fix mgid extraction (#20241) +* [libsyn] Improve extraction (#20229) ++ [youtube] Add more invidious instances to URL regular expression (#20228) +* [spankbang] Fix extraction (#20023) +* [espn] Extend URL regular expression (#20013) +* [sixplay] Handle videos with empty assets (#20016) ++ [vimeo] Add support for Vimeo Pro portfolio protected videos (#20070) + + version 2019.03.01 Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/test/test_YoutubeDLCookieJar.py new/youtube-dl/test/test_YoutubeDLCookieJar.py --- old/youtube-dl/test/test_YoutubeDLCookieJar.py 2019-02-21 19:18:59.000000000 +0100 +++ new/youtube-dl/test/test_YoutubeDLCookieJar.py 2019-03-08 20:52:53.000000000 +0100 @@ -29,6 +29,16 @@ tf.close() os.remove(tf.name) + def test_strip_httponly_prefix(self): + cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') + cookiejar.load(ignore_discard=True, ignore_expires=True) + + def assert_cookie_has_value(key): + self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') + + assert_cookie_has_value('HTTPONLY_COOKIE') + assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') + if __name__ == '__main__': unittest.main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/test/test_compat.py new/youtube-dl/test/test_compat.py --- old/youtube-dl/test/test_compat.py 2019-02-21 19:18:59.000000000 +0100 +++ new/youtube-dl/test/test_compat.py 2019-03-08 20:52:53.000000000 +0100 @@ -13,6 +13,7 @@ from youtube_dl.compat import ( compat_getenv, compat_setenv, + compat_etree_Element, compat_etree_fromstring, compat_expanduser, compat_shlex_split, @@ -90,6 +91,12 @@ self.assertEqual(compat_shlex_split('-option "one\ntwo" \n -flag'), ['-option', 'one\ntwo', '-flag']) self.assertEqual(compat_shlex_split('-val 中文'), ['-val', '中文']) + def test_compat_etree_Element(self): + try: + compat_etree_Element.items + except AttributeError: + self.fail('compat_etree_Element is not a type') + def test_compat_etree_fromstring(self): xml = ''' <root foo="bar" spam="中文"> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/test/testdata/cookies/httponly_cookies.txt new/youtube-dl/test/testdata/cookies/httponly_cookies.txt --- old/youtube-dl/test/testdata/cookies/httponly_cookies.txt 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/test/testdata/cookies/httponly_cookies.txt 2019-03-08 20:52:53.000000000 +0100 @@ -0,0 +1,6 @@ +# Netscape HTTP Cookie File +# http://curl.haxx.se/rfc/cookie_spec.html +# This is a generated file! Do not edit. + +#HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE +www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/compat.py new/youtube-dl/youtube_dl/compat.py --- old/youtube-dl/youtube_dl/compat.py 2019-02-21 19:19:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/compat.py 2019-03-08 20:52:53.000000000 +0100 @@ -2508,6 +2508,15 @@ pass +try: + # xml.etree.ElementTree.Element is a method in Python <=2.6 and + # the following will crash with: + # TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types + isinstance(None, xml.etree.ElementTree.Element) + from xml.etree.ElementTree import Element as compat_etree_Element +except TypeError: # Python <=2.6 + from xml.etree.ElementTree import _ElementInterface as compat_etree_Element + if sys.version_info[0] >= 3: def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) @@ -2969,6 +2978,7 @@ 'compat_cookiejar', 'compat_cookies', 'compat_ctypes_WINFUNCTYPE', + 'compat_etree_Element', 'compat_etree_fromstring', 'compat_etree_register_namespace', 'compat_expanduser', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/bbc.py new/youtube-dl/youtube_dl/extractor/bbc.py --- old/youtube-dl/youtube_dl/extractor/bbc.py 2019-02-21 19:19:11.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/bbc.py 2019-03-08 20:52:53.000000000 +0100 @@ -3,7 +3,6 @@ import itertools import re -import xml from .common import InfoExtractor from ..utils import ( @@ -23,6 +22,7 @@ urljoin, ) from ..compat import ( + compat_etree_Element, compat_HTTPError, compat_urlparse, ) @@ -317,7 +317,7 @@ continue captions = self._download_xml( cc_url, programme_id, 'Downloading captions', fatal=False) - if not isinstance(captions, xml.etree.ElementTree.Element): + if not isinstance(captions, compat_etree_Element): continue lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en') subtitles[lang] = [ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/common.py new/youtube-dl/youtube_dl/extractor/common.py --- old/youtube-dl/youtube_dl/extractor/common.py 2019-02-21 19:19:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/common.py 2019-03-08 20:52:53.000000000 +0100 @@ -17,6 +17,7 @@ from ..compat import ( compat_cookiejar, compat_cookies, + compat_etree_Element, compat_etree_fromstring, compat_getpass, compat_integer_types, @@ -102,10 +103,23 @@ from worst to best quality. Potential fields: - * url Mandatory. The URL of the video file + * url The mandatory URL representing the media: + for plain file media - HTTP URL of this file, + for RTMP - RTMP URL, + for HLS - URL of the M3U8 media playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest or + base URL representing the media + if MPD manifest is parsed from + a string, + for MSS - URL of the ISM manifest. * manifest_url The URL of the manifest file in case of - fragmented media (DASH, hls, hds) + fragmented media: + for HLS - URL of the M3U8 master playlist, + for HDS - URL of the F4M manifest, + for DASH - URL of the MPD manifest, + for MSS - URL of the ISM manifest. * ext Will be calculated from URL if missing * format A human-readable description of the format ("mp4 container with h264/opus"). @@ -788,7 +802,7 @@ fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return a tuple (xml as an xml.etree.ElementTree.Element, URL handle). + Return a tuple (xml as an compat_etree_Element, URL handle). See _download_webpage docstring for arguments specification. """ @@ -809,7 +823,7 @@ transform_source=None, fatal=True, encoding=None, data=None, headers={}, query={}, expected_status=None): """ - Return the xml as an xml.etree.ElementTree.Element. + Return the xml as an compat_etree_Element. See _download_webpage docstring for arguments specification. """ @@ -1454,6 +1468,9 @@ def _parse_f4m_formats(self, manifest, manifest_url, video_id, preference=None, f4m_id=None, transform_source=lambda s: fix_xml_ampersands(s).strip(), fatal=True, m3u8_id=None): + if not isinstance(manifest, compat_etree_Element) and not fatal: + return [] + # currently youtube-dl cannot decode the playerVerificationChallenge as Akamai uses Adobe Alchemy akamai_pv = manifest.find('{http://ns.adobe.com/f4m/1.0}pv-2.0') if akamai_pv is not None and ';' in akamai_pv.text: @@ -2120,7 +2137,8 @@ bandwidth = int_or_none(representation_attrib.get('bandwidth')) f = { 'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, - 'url': base_url, + # NB: mpd_url may be empty when MPD manifest is parsed from a string + 'url': mpd_url or base_url, 'manifest_url': mpd_url, 'ext': mimetype2ext(mime_type), 'width': int_or_none(representation_attrib.get('width')), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/crunchyroll.py new/youtube-dl/youtube_dl/extractor/crunchyroll.py --- old/youtube-dl/youtube_dl/extractor/crunchyroll.py 2019-02-21 19:19:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/crunchyroll.py 2019-03-08 20:52:53.000000000 +0100 @@ -3,7 +3,6 @@ import re import json -import xml.etree.ElementTree as etree import zlib from hashlib import sha1 @@ -12,6 +11,7 @@ from .vrv import VRVIE from ..compat import ( compat_b64decode, + compat_etree_Element, compat_etree_fromstring, compat_urllib_parse_urlencode, compat_urllib_request, @@ -56,22 +56,11 @@ if username is None: return - self._download_webpage( - 'https://www.crunchyroll.com/?a=formhandler', - None, 'Logging in', 'Wrong login info', - data=urlencode_postdata({ - 'formname': 'RpcApiUser_Login', - 'next_url': 'https://www.crunchyroll.com/acct/membership', - 'name': username, - 'password': password, - })) - - ''' login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') def is_logged(webpage): - return '<title>Redirecting' in webpage + return 'href="/logout"' in webpage # Already logged in if is_logged(login_page): @@ -110,7 +99,6 @@ raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') - ''' def _real_initialize(self): self._login() @@ -402,7 +390,7 @@ 'Downloading subtitles for ' + sub_name, data={ 'subtitle_script_id': sub_id, }) - if not isinstance(sub_doc, etree.Element): + if not isinstance(sub_doc, compat_etree_Element): continue sid = sub_doc.get('id') iv = xpath_text(sub_doc, 'iv', 'subtitle iv') @@ -519,7 +507,7 @@ 'video_quality': stream_quality, 'current_page': url, }) - if isinstance(streamdata, etree.Element): + if isinstance(streamdata, compat_etree_Element): stream_info = streamdata.find('./{default}preload/stream_info') if stream_info is not None: stream_infos.append(stream_info) @@ -530,7 +518,7 @@ 'video_format': stream_format, 'video_encode_quality': stream_quality, }) - if isinstance(stream_info, etree.Element): + if isinstance(stream_info, compat_etree_Element): stream_infos.append(stream_info) for stream_info in stream_infos: video_encode_id = xpath_text(stream_info, './video_encode_id') @@ -605,7 +593,7 @@ season = episode = episode_number = duration = thumbnail = None - if isinstance(metadata, etree.Element): + if isinstance(metadata, compat_etree_Element): season = xpath_text(metadata, 'series_title') episode = xpath_text(metadata, 'episode_title') episode_number = int_or_none(xpath_text(metadata, 'episode_number')) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/espn.py new/youtube-dl/youtube_dl/extractor/espn.py --- old/youtube-dl/youtube_dl/extractor/espn.py 2019-02-21 19:19:01.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/espn.py 2019-03-08 20:52:53.000000000 +0100 @@ -29,7 +29,8 @@ (?: .*?\?.*?\bid=| /_/id/ - ) + )| + [^/]+/video/ ) )| (?:www\.)espnfc\.(?:com|us)/(?:video/)?[^/]+/\d+/video/ @@ -94,6 +95,9 @@ }, { 'url': 'http://www.espnfc.com/english-premier-league/23/video/3324163/premier-league-in-90-seconds-golden-tweets', 'only_matching': True, + }, { + 'url': 'http://www.espn.com/espnw/video/26066627/arkansas-gibson-completes-hr-cycle-four-innings', + 'only_matching': True, }] def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/facebook.py new/youtube-dl/youtube_dl/extractor/facebook.py --- old/youtube-dl/youtube_dl/extractor/facebook.py 2019-02-21 19:19:01.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/facebook.py 2019-03-08 20:52:53.000000000 +0100 @@ -424,7 +424,7 @@ uploader = clean_html(get_element_by_id( 'fbPhotoPageAuthorName', webpage)) or self._search_regex( r'ownerName\s*:\s*"([^"]+)"', webpage, 'uploader', - fatal=False) or self._og_search_title(webpage, fatal=False) + default=None) or self._og_search_title(webpage, fatal=False) timestamp = int_or_none(self._search_regex( r'<abbr[^>]+data-utime=["\'](\d+)', webpage, 'timestamp', default=None)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/francetv.py new/youtube-dl/youtube_dl/extractor/francetv.py --- old/youtube-dl/youtube_dl/extractor/francetv.py 2019-02-21 19:19:11.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/francetv.py 2019-03-08 20:52:53.000000000 +0100 @@ -215,7 +215,7 @@ _TESTS = [{ 'url': 'https://www.france.tv/france-2/13h15-le-dimanche/140921-les-mysteres-de-jesus.html', 'info_dict': { - 'id': '162311093', + 'id': 'ec217ecc-0733-48cf-ac06-af1347b849d1', 'ext': 'mp4', 'title': '13h15, le dimanche... - Les mystères de Jésus', 'description': 'md5:75efe8d4c0a8205e5904498ffe1e1a42', @@ -271,7 +271,7 @@ catalogue = None video_id = self._search_regex( - r'(?:data-main-video\s*=|videoId\s*:)\s*(["\'])(?P<id>(?:(?!\1).)+)\1', + r'(?:data-main-video\s*=|videoId["\']?\s*[:=])\s*(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'video id', default=None, group='id') if not video_id: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/libsyn.py new/youtube-dl/youtube_dl/extractor/libsyn.py --- old/youtube-dl/youtube_dl/extractor/libsyn.py 2019-02-21 19:19:01.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/libsyn.py 2019-03-08 20:52:53.000000000 +0100 @@ -1,12 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import json import re from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, parse_duration, + strip_or_none, unified_strdate, ) @@ -21,7 +23,9 @@ 'id': '6385796', 'ext': 'mp3', 'title': "Champion Minded - Developing a Growth Mindset", - 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', + # description fetched using another request: + # http://html5-player.libsyn.com/embed/getitemdetails?item_id=6385796 + # 'description': 'In this episode, Allistair talks about the importance of developing a growth mindset, not only in sports, but in life too.', 'upload_date': '20180320', 'thumbnail': 're:^https?://.*', }, @@ -38,22 +42,36 @@ }] def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') - url = m.group('mainurl') + url, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, video_id) - podcast_title = self._search_regex( - r'<h3>([^<]+)</h3>', webpage, 'podcast title', default=None) - if podcast_title: - podcast_title = podcast_title.strip() - episode_title = self._search_regex( - r'(?:<div class="episode-title">|<h4>)([^<]+)</', webpage, 'episode title') - if episode_title: - episode_title = episode_title.strip() + data = self._parse_json(self._search_regex( + r'var\s+playlistItem\s*=\s*({.+?});', + webpage, 'JSON data block'), video_id) + + episode_title = data.get('item_title') or get_element_by_class('episode-title', webpage) + if not episode_title: + self._search_regex( + [r'data-title="([^"]+)"', r'<title>(.+?)</title>'], + webpage, 'episode title') + episode_title = episode_title.strip() + + podcast_title = strip_or_none(clean_html(self._search_regex( + r'<h3>([^<]+)</h3>', webpage, 'podcast title', + default=None) or get_element_by_class('podcast-title', webpage))) title = '%s - %s' % (podcast_title, episode_title) if podcast_title else episode_title + formats = [] + for k, format_id in (('media_url_libsyn', 'libsyn'), ('media_url', 'main'), ('download_link', 'download')): + f_url = data.get(k) + if not f_url: + continue + formats.append({ + 'url': f_url, + 'format_id': format_id, + }) + description = self._html_search_regex( r'<p\s+id="info_text_body">(.+?)</p>', webpage, 'description', default=None) @@ -61,27 +79,15 @@ # Strip non-breaking and normal spaces description = description.replace('\u00A0', ' ').strip() release_date = unified_strdate(self._search_regex( - r'<div class="release_date">Released: ([^<]+)<', webpage, 'release date', fatal=False)) - - data_json = self._search_regex(r'var\s+playlistItem\s*=\s*(\{.*?\});\n', webpage, 'JSON data block') - data = json.loads(data_json) - - formats = [{ - 'url': data['media_url'], - 'format_id': 'main', - }, { - 'url': data['media_url_libsyn'], - 'format_id': 'libsyn', - }] - thumbnail = data.get('thumbnail_url') - duration = parse_duration(data.get('duration')) + r'<div class="release_date">Released: ([^<]+)<', + webpage, 'release date', default=None) or data.get('release_date')) return { 'id': video_id, 'title': title, 'description': description, - 'thumbnail': thumbnail, + 'thumbnail': data.get('thumbnail_url'), 'upload_date': release_date, - 'duration': duration, + 'duration': parse_duration(data.get('duration')), 'formats': formats, } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/npo.py new/youtube-dl/youtube_dl/extractor/npo.py --- old/youtube-dl/youtube_dl/extractor/npo.py 2019-02-21 19:19:11.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/npo.py 2019-03-08 20:52:53.000000000 +0100 @@ -238,7 +238,7 @@ formats.extend(self._extract_m3u8_formats( stream_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) - elif '.ism/Manifest' in stream_url: + elif re.search(r'\.isml?/Manifest', stream_url): formats.extend(self._extract_ism_formats( stream_url, video_id, ism_id='mss', fatal=False)) else: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/openload.py new/youtube-dl/youtube_dl/extractor/openload.py --- old/youtube-dl/youtube_dl/extractor/openload.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/openload.py 2019-03-08 20:52:53.000000000 +0100 @@ -249,7 +249,7 @@ (?:www\.)? (?: openload\.(?:co|io|link|pw)| - oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live) + oload\.(?:tv|stream|site|xyz|win|download|cloud|cc|icu|fun|club|info|pw|live|space) ) )/ (?:f|embed)/ @@ -349,6 +349,9 @@ }, { 'url': 'https://oload.live/f/-Z58UZ-GR4M', 'only_matching': True, + }, { + 'url': 'https://oload.space/f/IY4eZSst3u8/', + 'only_matching': True, }] _USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/rai.py new/youtube-dl/youtube_dl/extractor/rai.py --- old/youtube-dl/youtube_dl/extractor/rai.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/rai.py 2019-03-08 20:52:53.000000000 +0100 @@ -74,11 +74,11 @@ if (ext == 'm3u8' and platform != 'mon') or (ext == 'f4m' and platform != 'flash'): continue - if ext == 'm3u8': + if ext == 'm3u8' or 'format=m3u8' in media_url or platform == 'mon': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) - elif ext == 'f4m': + elif ext == 'f4m' or platform == 'flash': manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/sixplay.py new/youtube-dl/youtube_dl/extractor/sixplay.py --- old/youtube-dl/youtube_dl/extractor/sixplay.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/sixplay.py 2019-03-08 20:52:53.000000000 +0100 @@ -61,7 +61,8 @@ quality_key = qualities(['lq', 'sd', 'hq', 'hd']) formats = [] subtitles = {} - for asset in clip_data['assets']: + assets = clip_data.get('assets') or [] + for asset in assets: asset_url = asset.get('full_physical_path') protocol = asset.get('protocol') if not asset_url or protocol == 'primetime' or asset.get('type') == 'usp_hlsfp_h264' or asset_url in urls: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/spankbang.py new/youtube-dl/youtube_dl/extractor/spankbang.py --- old/youtube-dl/youtube_dl/extractor/spankbang.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/spankbang.py 2019-03-08 20:52:53.000000000 +0100 @@ -9,6 +9,8 @@ parse_duration, parse_resolution, str_to_int, + url_or_none, + urlencode_postdata, ) @@ -64,16 +66,49 @@ 'Video %s is not available' % video_id, expected=True) formats = [] - for mobj in re.finditer( - r'stream_url_(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', - webpage): - format_id, format_url = mobj.group('id', 'url') + + def extract_format(format_id, format_url): + f_url = url_or_none(format_url) + if not f_url: + return f = parse_resolution(format_id) f.update({ - 'url': format_url, + 'url': f_url, 'format_id': format_id, }) formats.append(f) + + STREAM_URL_PREFIX = 'stream_url_' + + for mobj in re.finditer( + r'%s(?P<id>[^\s=]+)\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2' + % STREAM_URL_PREFIX, webpage): + extract_format(mobj.group('id', 'url')) + + if not formats: + stream_key = self._search_regex( + r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', + webpage, 'stream key', group='value') + + sb_csrf_session = self._get_cookies( + 'https://spankbang.com')['sb_csrf_session'].value + + stream = self._download_json( + 'https://spankbang.com/api/videos/stream', video_id, + 'Downloading stream JSON', data=urlencode_postdata({ + 'id': stream_key, + 'data': 0, + 'sb_csrf_session': sb_csrf_session, + }), headers={ + 'Referer': url, + 'X-CSRFToken': sb_csrf_session, + }) + + for format_id, format_url in stream.items(): + if format_id.startswith(STREAM_URL_PREFIX): + extract_format( + format_id[len(STREAM_URL_PREFIX):], format_url) + self._sort_formats(formats) title = self._html_search_regex( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/spike.py new/youtube-dl/youtube_dl/extractor/spike.py --- old/youtube-dl/youtube_dl/extractor/spike.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/spike.py 2019-03-08 20:52:53.000000000 +0100 @@ -46,8 +46,12 @@ _GEO_COUNTRIES = ['US'] def _extract_mgid(self, webpage): - cs = self._parse_json(self._search_regex( + root_data = self._parse_json(self._search_regex( r'window\.__DATA__\s*=\s*({.+})', - webpage, 'data'), None)['children'] - c = next(c for c in cs if c.get('type') == 'VideoPlayer') + webpage, 'data'), None) + + def find_sub_data(data, data_type): + return next(c for c in data['children'] if c.get('type') == data_type) + + c = find_sub_data(find_sub_data(root_data, 'MainContainer'), 'VideoPlayer') return c['props']['media']['video']['config']['uri'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/toutv.py new/youtube-dl/youtube_dl/extractor/toutv.py --- old/youtube-dl/youtube_dl/extractor/toutv.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/toutv.py 2019-03-08 20:52:53.000000000 +0100 @@ -1,14 +1,14 @@ # coding: utf-8 from __future__ import unicode_literals -import re +import json from .radiocanada import RadioCanadaIE +from ..compat import compat_HTTPError from ..utils import ( - extract_attributes, + ExtractorError, int_or_none, merge_dicts, - urlencode_postdata, ) @@ -38,47 +38,30 @@ 'url': 'https://ici.tou.tv/l-age-adulte/S01C501', 'only_matching': True, }] + _CLIENT_KEY = '4dd36440-09d5-4468-8923-b6d91174ad36' def _real_initialize(self): email, password = self._get_login_info() if email is None: return - login_webpage = self._download_webpage( - 'https://services.radio-canada.ca/auth/oauth/v2/authorize', - None, 'Downloading login page', query={ - 'client_id': '4dd36440-09d5-4468-8923-b6d91174ad36', - 'redirect_uri': 'https://ici.tou.tv/logincallback', - 'response_type': 'token', - 'scope': 'id.write media-validation.read', - 'state': '/', - }) - - def extract_form_url_and_data(wp, default_form_url, form_spec_re=''): - form, form_elem = re.search( - r'(?s)((<form[^>]+?%s[^>]*?>).+?</form>)' % form_spec_re, wp).groups() - form_data = self._hidden_inputs(form) - form_url = extract_attributes(form_elem).get('action') or default_form_url - return form_url, form_data - - post_url, form_data = extract_form_url_and_data( - login_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/login', - r'(?:id|name)="Form-login"') - form_data.update({ - 'login-email': email, - 'login-password': password, - }) - consent_webpage = self._download_webpage( - post_url, None, 'Logging in', data=urlencode_postdata(form_data)) - post_url, form_data = extract_form_url_and_data( - consent_webpage, - 'https://services.radio-canada.ca/auth/oauth/v2/authorize/consent') - _, urlh = self._download_webpage_handle( - post_url, None, 'Following Redirection', - data=urlencode_postdata(form_data)) - self._access_token = self._search_regex( - r'access_token=([\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})', - urlh.geturl(), 'access token') + try: + self._access_token = self._download_json( + 'https://services.radio-canada.ca/toutv/profiling/accounts/login', + None, 'Logging in', data=json.dumps({ + 'ClientId': self._CLIENT_KEY, + 'ClientSecret': '34026772-244b-49b6-8b06-317b30ac9a20', + 'Email': email, + 'Password': password, + 'Scope': 'id.write media-validation.read', + }).encode(), headers={ + 'Authorization': 'client-key ' + self._CLIENT_KEY, + 'Content-Type': 'application/json;charset=utf-8', + })['access_token'] + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + error = self._parse_json(e.cause.read().decode(), None)['Message'] + raise ExtractorError(error, expected=True) + raise self._claims = self._call_api('validation/v2/getClaims')['claims'] def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/urplay.py new/youtube-dl/youtube_dl/extractor/urplay.py --- old/youtube-dl/youtube_dl/extractor/urplay.py 2019-02-21 19:19:02.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/urplay.py 2019-03-08 20:52:53.000000000 +0100 @@ -2,18 +2,31 @@ from __future__ import unicode_literals from .common import InfoExtractor +from ..utils import unified_timestamp class URPlayIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ur(?:play|skola)\.se/(?:program|Produkter)/(?P<id>[0-9]+)' _TESTS = [{ - 'url': 'http://urplay.se/program/190031-tripp-trapp-trad-sovkudde', - 'md5': 'ad5f0de86f16ca4c8062cd103959a9eb', + 'url': 'https://urplay.se/program/203704-ur-samtiden-livet-universum-och-rymdens-markliga-musik-om-vetenskap-kritiskt-tankande-och-motstand', + 'md5': 'ff5b0c89928f8083c74bbd5099c9292d', + 'info_dict': { + 'id': '203704', + 'ext': 'mp4', + 'title': 'UR Samtiden - Livet, universum och rymdens märkliga musik : Om vetenskap, kritiskt tänkande och motstånd', + 'description': 'md5:5344508a52aa78c1ced6c1b8b9e44e9a', + 'timestamp': 1513512768, + 'upload_date': '20171217', + }, + }, { + 'url': 'https://urskola.se/Produkter/190031-Tripp-Trapp-Trad-Sovkudde', 'info_dict': { 'id': '190031', 'ext': 'mp4', 'title': 'Tripp, Trapp, Träd : Sovkudde', 'description': 'md5:b86bffdae04a7e9379d1d7e5947df1d1', + 'timestamp': 1440093600, + 'upload_date': '20150820', }, }, { 'url': 'http://urskola.se/Produkter/155794-Smasagor-meankieli-Grodan-i-vida-varlden', @@ -51,6 +64,7 @@ 'title': urplayer_data['title'], 'description': self._og_search_description(webpage), 'thumbnail': urplayer_data.get('image'), + 'timestamp': unified_timestamp(self._html_search_meta(('uploadDate', 'schema:uploadDate'), webpage, 'timestamp')), 'series': urplayer_data.get('series_title'), 'subtitles': subtitles, 'formats': formats, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vimeo.py new/youtube-dl/youtube_dl/extractor/vimeo.py --- old/youtube-dl/youtube_dl/extractor/vimeo.py 2019-02-21 19:19:03.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/vimeo.py 2019-03-08 20:52:53.000000000 +0100 @@ -502,7 +502,11 @@ mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') orig_url = url - if mobj.group('pro') or mobj.group('player'): + if mobj.group('pro'): + # some videos require portfolio_id to be present in player url + # https://github.com/rg3/youtube-dl/issues/20070 + url = self._extract_url(url, self._download_webpage(url, video_id)) + elif mobj.group('player'): url = 'https://player.vimeo.com/video/' + video_id elif any(p in url for p in ('play_redirect_hls', 'moogaloop.swf')): url = 'https://vimeo.com/' + video_id diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2019-02-21 19:19:03.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2019-03-08 20:52:53.000000000 +0100 @@ -352,6 +352,9 @@ (?:www\.)?yourepeat\.com/| tube\.majestyc\.net/| (?:www\.)?invidio\.us/| + (?:www\.)?invidious\.snopyta\.org/| + (?:www\.)?invidious\.kabi\.tk/| + (?:www\.)?vid\.wxzm\.sx/| youtube\.googleapis\.com/) # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/utils.py new/youtube-dl/youtube_dl/utils.py --- old/youtube-dl/youtube_dl/utils.py 2019-02-21 19:19:03.000000000 +0100 +++ new/youtube-dl/youtube_dl/utils.py 2019-03-08 20:52:53.000000000 +0100 @@ -1141,6 +1141,8 @@ class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar): + _HTTPONLY_PREFIX = '#HttpOnly_' + def save(self, filename=None, ignore_discard=False, ignore_expires=False): # Store session cookies with `expires` set to 0 instead of an empty # string @@ -1150,7 +1152,21 @@ compat_cookiejar.MozillaCookieJar.save(self, filename, ignore_discard, ignore_expires) def load(self, filename=None, ignore_discard=False, ignore_expires=False): - compat_cookiejar.MozillaCookieJar.load(self, filename, ignore_discard, ignore_expires) + """Load cookies from a file.""" + if filename is None: + if self.filename is not None: + filename = self.filename + else: + raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT) + + cf = io.StringIO() + with open(filename) as f: + for line in f: + if line.startswith(self._HTTPONLY_PREFIX): + line = line[len(self._HTTPONLY_PREFIX):] + cf.write(compat_str(line)) + cf.seek(0) + self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to # an empty string or 0. MozillaCookieJar only recognizes the former # (see [1]). So we need force the latter to be recognized as session diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2019-02-28 19:03:47.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2019-03-08 20:53:15.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.03.01' +__version__ = '2019.03.09'