Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2020-03-04 09:44:54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.26092 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Wed Mar 4 09:44:54 2020 rev:127 rq:781201 version:2020.03.01 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2020-02-18 13:29:29.312712217 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.26092/python-youtube-dl.changes 2020-03-04 09:45:31.542111922 +0100 @@ -1,0 +2,24 @@ +Mon Mar 2 09:04:45 UTC 2020 - Martin Rey <[email protected]> + +- Update to release 2020.03.01 + * Core + * [YoutubeDL] Force redirect URL to unicode on python 2 + * [options] Remove duplicate short option -v for --version + (#24162) + * Extractors + * [xhamster] Fix extraction (#24205) + * [franceculture] Fix extraction (#24204) + * [telecinco] Add support for article opening videos + * [telecinco] Fix extraction (#24195) + * [xtube] Fix metadata extraction (#21073, #22455) + * [youjizz] Fix extraction (#24181) + * Remove no longer needed compat_str around geturl + * [pornhd] Fix extraction (#24128) + * [teachable] Add support for multiple videos per lecture + (#24101) + * [wistia] Add support for multiple generic embeds + (#8347, 11385) + * [imdb] Fix extraction (#23443) + * [tv2dk:bornholm:play] Fix extraction (#24076) + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2020.02.16.tar.gz youtube-dl-2020.02.16.tar.gz.sig New: ---- youtube-dl-2020.03.01.tar.gz youtube-dl-2020.03.01.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.00LCgs/_old 2020-03-04 09:45:48.466122066 +0100 +++ /var/tmp/diff_new_pack.00LCgs/_new 2020-03-04 09:45:48.486122078 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2020.02.16 +Version: 2020.03.01 Release: 0 Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.00LCgs/_old 2020-03-04 09:45:48.518122098 +0100 +++ /var/tmp/diff_new_pack.00LCgs/_new 2020-03-04 09:45:48.522122100 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2020.02.16 +Version: 2020.03.01 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2020.02.16.tar.gz -> youtube-dl-2020.03.01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2020-02-16 16:43:38.000000000 +0100 +++ new/youtube-dl/ChangeLog 2020-03-01 14:11:29.000000000 +0100 @@ -1,3 +1,24 @@ +version 2020.03.01 + +Core +* [YoutubeDL] Force redirect URL to unicode on python 2 +- [options] Remove duplicate short option -v for --version (#24162) + +Extractors +* [xhamster] Fix extraction (#24205) +* [franceculture] Fix extraction (#24204) ++ [telecinco] Add support for article opening videos +* [telecinco] Fix extraction (#24195) +* [xtube] Fix metadata extraction (#21073, #22455) +* [youjizz] Fix extraction (#24181) +- Remove no longer needed compat_str around geturl +* [pornhd] Fix extraction (#24128) ++ [teachable] Add support for multiple videos per lecture (#24101) ++ [wistia] Add support for multiple generic embeds (#8347, 11385) +* [imdb] Fix extraction (#23443) +* [tv2dk:bornholm:play] Fix extraction (#24076) + + version 2020.02.16 Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/test/test_subtitles.py new/youtube-dl/test/test_subtitles.py --- old/youtube-dl/test/test_subtitles.py 2020-02-16 16:41:17.000000000 +0100 +++ new/youtube-dl/test/test_subtitles.py 2020-03-01 14:10:40.000000000 +0100 @@ -26,7 +26,6 @@ ThePlatformIE, ThePlatformFeedIE, RTVEALaCartaIE, - FunnyOrDieIE, DemocracynowIE, ) @@ -322,18 +321,6 @@ self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') -class TestFunnyOrDieSubtitles(BaseTestSubtitles): - url = 'http://www.funnyordie.com/videos/224829ff6d/judd-apatow-will-direct-your-vine' - IE = FunnyOrDieIE - - def test_allsubtitles(self): - self.DL.params['writesubtitles'] = True - self.DL.params['allsubtitles'] = True - subtitles = self.getSubtitles() - self.assertEqual(set(subtitles.keys()), set(['en'])) - self.assertEqual(md5(subtitles['en']), 'c5593c193eacd353596c11c2d4f9ecc4') - - class TestDemocracynowSubtitles(BaseTestSubtitles): url = 'http://www.democracynow.org/shows/2015/7/3' IE = DemocracynowIE Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/YoutubeDL.py new/youtube-dl/youtube_dl/YoutubeDL.py --- old/youtube-dl/youtube_dl/YoutubeDL.py 2020-02-16 16:41:28.000000000 +0100 +++ new/youtube-dl/youtube_dl/YoutubeDL.py 2020-03-01 14:10:40.000000000 +0100 @@ -92,6 +92,7 @@ YoutubeDLCookieJar, YoutubeDLCookieProcessor, YoutubeDLHandler, + YoutubeDLRedirectHandler, ) from .cache import Cache from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER @@ -2343,6 +2344,7 @@ debuglevel = 1 if self.params.get('debug_printtraffic') else 0 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel) ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel) + redirect_handler = YoutubeDLRedirectHandler() data_handler = compat_urllib_request_DataHandler() # When passing our own FileHandler instance, build_opener won't add the @@ -2356,7 +2358,7 @@ file_handler.file_open = file_open opener = compat_urllib_request.build_opener( - proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler) + proxy_handler, https_handler, cookie_processor, ydlh, redirect_handler, data_handler, file_handler) # Delete the default user-agent header, which would otherwise apply in # cases where our custom HTTP handler doesn't come into play diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/eporner.py new/youtube-dl/youtube_dl/extractor/eporner.py --- old/youtube-dl/youtube_dl/extractor/eporner.py 2020-02-16 16:41:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/eporner.py 2020-03-01 14:10:40.000000000 +0100 @@ -4,7 +4,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( encode_base_n, ExtractorError, @@ -55,7 +54,7 @@ webpage, urlh = self._download_webpage_handle(url, display_id) - video_id = self._match_id(compat_str(urlh.geturl())) + video_id = self._match_id(urlh.geturl()) hash = self._search_regex( r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/franceculture.py new/youtube-dl/youtube_dl/extractor/franceculture.py --- old/youtube-dl/youtube_dl/extractor/franceculture.py 2020-02-16 16:41:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/franceculture.py 2020-03-01 14:10:40.000000000 +0100 @@ -31,7 +31,13 @@ webpage = self._download_webpage(url, display_id) video_data = extract_attributes(self._search_regex( - r'(?s)<div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*>.*?(<button[^>]+data-asset-source="[^"]+"[^>]+>)', + r'''(?sx) + (?: + </h1>| + <div[^>]+class="[^"]*?(?:title-zone-diffusion|heading-zone-(?:wrapper|player-button))[^"]*?"[^>]*> + ).*? + (<button[^>]+data-asset-source="[^"]+"[^>]+>) + ''', webpage, 'video data')) video_url = video_data['data-asset-source'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/generic.py new/youtube-dl/youtube_dl/extractor/generic.py --- old/youtube-dl/youtube_dl/extractor/generic.py 2020-02-16 16:41:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/generic.py 2020-03-01 14:10:40.000000000 +0100 @@ -2287,7 +2287,7 @@ if head_response is not False: # Check for redirect - new_url = compat_str(head_response.geturl()) + new_url = head_response.geturl() if url != new_url: self.report_following_redirect(new_url) if force_videoid: @@ -2387,12 +2387,12 @@ return self.playlist_result( self._parse_xspf( doc, video_id, xspf_url=url, - xspf_base_url=compat_str(full_response.geturl())), + xspf_base_url=full_response.geturl()), video_id) elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag): info_dict['formats'] = self._parse_mpd_formats( doc, - mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0], + mpd_base_url=full_response.geturl().rpartition('/')[0], mpd_url=url) self._sort_formats(info_dict['formats']) return info_dict @@ -2537,14 +2537,15 @@ dailymail_urls, video_id, video_title, ie=DailyMailIE.ie_key()) # Look for embedded Wistia player - wistia_url = WistiaIE._extract_url(webpage) - if wistia_url: - return { - '_type': 'url_transparent', - 'url': self._proto_relative_url(wistia_url), - 'ie_key': WistiaIE.ie_key(), - 'uploader': video_uploader, - } + wistia_urls = WistiaIE._extract_urls(webpage) + if wistia_urls: + playlist = self.playlist_from_matches(wistia_urls, video_id, video_title, ie=WistiaIE.ie_key()) + for entry in playlist['entries']: + entry.update({ + '_type': 'url_transparent', + 'uploader': video_uploader, + }) + return playlist # Look for SVT player svt_url = SVTIE._extract_url(webpage) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/imdb.py new/youtube-dl/youtube_dl/extractor/imdb.py --- old/youtube-dl/youtube_dl/extractor/imdb.py 2020-02-16 16:41:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/imdb.py 2020-03-01 14:10:40.000000000 +0100 @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import base64 +import json import re from .common import InfoExtractor @@ -8,6 +10,7 @@ mimetype2ext, parse_duration, qualities, + try_get, url_or_none, ) @@ -15,15 +18,16 @@ class ImdbIE(InfoExtractor): IE_NAME = 'imdb' IE_DESC = 'Internet Movie Database trailers' - _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).+?[/-]vi(?P<id>\d+)' + _VALID_URL = r'https?://(?:www|m)\.imdb\.com/(?:video|title|list).*?[/-]vi(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.imdb.com/video/imdb/vi2524815897', 'info_dict': { 'id': '2524815897', 'ext': 'mp4', - 'title': 'No. 2 from Ice Age: Continental Drift (2012)', + 'title': 'No. 2', 'description': 'md5:87bd0bdc61e351f21f20d2d7441cb4e7', + 'duration': 152, } }, { 'url': 'http://www.imdb.com/video/_/vi2524815897', @@ -47,21 +51,23 @@ def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - 'https://www.imdb.com/videoplayer/vi' + video_id, video_id) - video_metadata = self._parse_json(self._search_regex( - r'window\.IMDbReactInitialState\.push\(({.+?})\);', webpage, - 'video metadata'), video_id)['videos']['videoMetadata']['vi' + video_id] - title = self._html_search_meta( - ['og:title', 'twitter:title'], webpage) or self._html_search_regex( - r'<title>(.+?)</title>', webpage, 'title', fatal=False) or video_metadata['title'] + + data = self._download_json( + 'https://www.imdb.com/ve/data/VIDEO_PLAYBACK_DATA', video_id, + query={ + 'key': base64.b64encode(json.dumps({ + 'type': 'VIDEO_PLAYER', + 'subType': 'FORCE_LEGACY', + 'id': 'vi%s' % video_id, + }).encode()).decode(), + })[0] quality = qualities(('SD', '480p', '720p', '1080p')) formats = [] - for encoding in video_metadata.get('encodings', []): + for encoding in data['videoLegacyEncodings']: if not encoding or not isinstance(encoding, dict): continue - video_url = url_or_none(encoding.get('videoUrl')) + video_url = url_or_none(encoding.get('url')) if not video_url: continue ext = mimetype2ext(encoding.get( @@ -69,7 +75,7 @@ if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + preference=1, m3u8_id='hls', fatal=False)) continue format_id = encoding.get('definition') formats.append({ @@ -80,13 +86,33 @@ }) self._sort_formats(formats) + webpage = self._download_webpage( + 'https://www.imdb.com/video/vi' + video_id, video_id) + video_metadata = self._parse_json(self._search_regex( + r'args\.push\(\s*({.+?})\s*\)\s*;', webpage, + 'video metadata'), video_id) + + video_info = video_metadata.get('VIDEO_INFO') + if video_info and isinstance(video_info, dict): + info = try_get( + video_info, lambda x: x[list(video_info.keys())[0]][0], dict) + else: + info = {} + + title = self._html_search_meta( + ['og:title', 'twitter:title'], webpage) or self._html_search_regex( + r'<title>(.+?)</title>', webpage, 'title', + default=None) or info['videoTitle'] + return { 'id': video_id, 'title': title, + 'alt_title': info.get('videoSubTitle'), 'formats': formats, - 'description': video_metadata.get('description'), - 'thumbnail': video_metadata.get('slate', {}).get('url'), - 'duration': parse_duration(video_metadata.get('duration')), + 'description': info.get('videoDescription'), + 'thumbnail': url_or_none(try_get( + video_metadata, lambda x: x['videoSlate']['source'])), + 'duration': parse_duration(info.get('videoRuntime')), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/lecturio.py new/youtube-dl/youtube_dl/extractor/lecturio.py --- old/youtube-dl/youtube_dl/extractor/lecturio.py 2020-02-16 16:41:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/lecturio.py 2020-03-01 14:10:40.000000000 +0100 @@ -4,7 +4,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( clean_html, determine_ext, @@ -36,7 +35,7 @@ self._LOGIN_URL, None, 'Downloading login popup') def is_logged(url_handle): - return self._LOGIN_URL not in compat_str(url_handle.geturl()) + return self._LOGIN_URL not in url_handle.geturl() # Already logged in if is_logged(urlh): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/linuxacademy.py new/youtube-dl/youtube_dl/extractor/linuxacademy.py --- old/youtube-dl/youtube_dl/extractor/linuxacademy.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/linuxacademy.py 2020-03-01 14:10:40.000000000 +0100 @@ -8,7 +8,6 @@ from ..compat import ( compat_b64decode, compat_HTTPError, - compat_str, ) from ..utils import ( ExtractorError, @@ -99,7 +98,7 @@ 'sso': 'true', }) - login_state_url = compat_str(urlh.geturl()) + login_state_url = urlh.geturl() try: login_page = self._download_webpage( @@ -129,7 +128,7 @@ }) access_token = self._search_regex( - r'access_token=([^=&]+)', compat_str(urlh.geturl()), + r'access_token=([^=&]+)', urlh.geturl(), 'access token') self._download_webpage( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mediaset.py new/youtube-dl/youtube_dl/extractor/mediaset.py --- old/youtube-dl/youtube_dl/extractor/mediaset.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mediaset.py 2020-03-01 14:10:40.000000000 +0100 @@ -6,7 +6,6 @@ from .theplatform import ThePlatformBaseIE from ..compat import ( compat_parse_qs, - compat_str, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -114,7 +113,7 @@ continue urlh = ie._request_webpage( embed_url, video_id, note='Following embed URL redirect') - embed_url = compat_str(urlh.geturl()) + embed_url = urlh.geturl() program_guid = _program_guid(_qs(embed_url)) if program_guid: entries.append(embed_url) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mediasite.py new/youtube-dl/youtube_dl/extractor/mediasite.py --- old/youtube-dl/youtube_dl/extractor/mediasite.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mediasite.py 2020-03-01 14:10:40.000000000 +0100 @@ -129,7 +129,7 @@ query = mobj.group('query') webpage, urlh = self._download_webpage_handle(url, resource_id) # XXX: add UrlReferrer? - redirect_url = compat_str(urlh.geturl()) + redirect_url = urlh.geturl() # XXX: might have also extracted UrlReferrer and QueryString from the html service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/platzi.py new/youtube-dl/youtube_dl/extractor/platzi.py --- old/youtube-dl/youtube_dl/extractor/platzi.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/platzi.py 2020-03-01 14:10:40.000000000 +0100 @@ -46,7 +46,7 @@ headers={'Referer': self._LOGIN_URL}) # login succeeded - if 'platzi.com/login' not in compat_str(urlh.geturl()): + if 'platzi.com/login' not in urlh.geturl(): return login_error = self._webpage_read_content( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/pornhd.py new/youtube-dl/youtube_dl/extractor/pornhd.py --- old/youtube-dl/youtube_dl/extractor/pornhd.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/pornhd.py 2020-03-01 14:10:40.000000000 +0100 @@ -8,6 +8,7 @@ ExtractorError, int_or_none, js_to_json, + merge_dicts, urljoin, ) @@ -27,23 +28,22 @@ 'view_count': int, 'like_count': int, 'age_limit': 18, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, { - # removed video 'url': 'http://www.pornhd.com/videos/1962/sierra-day-gets-his-cum-all-over-herself-hd-porn-video', - 'md5': '956b8ca569f7f4d8ec563e2c41598441', + 'md5': '1b7b3a40b9d65a8e5b25f7ab9ee6d6de', 'info_dict': { 'id': '1962', 'display_id': 'sierra-day-gets-his-cum-all-over-herself-hd-porn-video', 'ext': 'mp4', - 'title': 'Sierra loves doing laundry', + 'title': 'md5:98c6f8b2d9c229d0f0fde47f61a1a759', 'description': 'md5:8ff0523848ac2b8f9b065ba781ccf294', 'thumbnail': r're:^https?://.*\.jpg', 'view_count': int, 'like_count': int, 'age_limit': 18, }, - 'skip': 'Not available anymore', }] def _real_extract(self, url): @@ -61,7 +61,13 @@ r"(?s)sources'?\s*[:=]\s*(\{.+?\})", webpage, 'sources', default='{}')), video_id) + info = {} if not sources: + entries = self._parse_html5_media_entries(url, webpage, video_id) + if entries: + info = entries[0] + + if not sources and not info: message = self._html_search_regex( r'(?s)<(div|p)[^>]+class="no-video"[^>]*>(?P<value>.+?)</\1', webpage, 'error message', group='value') @@ -80,23 +86,29 @@ 'format_id': format_id, 'height': height, }) - self._sort_formats(formats) + if formats: + info['formats'] = formats + self._sort_formats(info['formats']) description = self._html_search_regex( - r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1', - webpage, 'description', fatal=False, group='value') + (r'(?s)<section[^>]+class=["\']video-description[^>]+>(?P<value>.+?)</section>', + r'<(div|p)[^>]+class="description"[^>]*>(?P<value>[^<]+)</\1'), + webpage, 'description', fatal=False, + group='value') or self._html_search_meta( + 'description', webpage, default=None) or self._og_search_description(webpage) view_count = int_or_none(self._html_search_regex( r'(\d+) views\s*<', webpage, 'view count', fatal=False)) thumbnail = self._search_regex( r"poster'?\s*:\s*([\"'])(?P<url>(?:(?!\1).)+)\1", webpage, - 'thumbnail', fatal=False, group='url') + 'thumbnail', default=None, group='url') like_count = int_or_none(self._search_regex( - (r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes', + (r'(\d+)</span>\s*likes', + r'(\d+)\s*</11[^>]+>(?: |\s)*\blikes', r'class=["\']save-count["\'][^>]*>\s*(\d+)'), webpage, 'like count', fatal=False)) - return { + return merge_dicts(info, { 'id': video_id, 'display_id': display_id, 'title': title, @@ -106,4 +118,4 @@ 'like_count': like_count, 'formats': formats, 'age_limit': 18, - } + }) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/safari.py new/youtube-dl/youtube_dl/extractor/safari.py --- old/youtube-dl/youtube_dl/extractor/safari.py 2020-02-16 16:41:19.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/safari.py 2020-03-01 14:10:40.000000000 +0100 @@ -8,7 +8,6 @@ from ..compat import ( compat_parse_qs, - compat_str, compat_urlparse, ) from ..utils import ( @@ -39,13 +38,13 @@ 'Downloading login page') def is_logged(urlh): - return 'learning.oreilly.com/home/' in compat_str(urlh.geturl()) + return 'learning.oreilly.com/home/' in urlh.geturl() if is_logged(urlh): self.LOGGED_IN = True return - redirect_url = compat_str(urlh.geturl()) + redirect_url = urlh.geturl() parsed_url = compat_urlparse.urlparse(redirect_url) qs = compat_parse_qs(parsed_url.query) next_uri = compat_urlparse.urljoin( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/teachable.py new/youtube-dl/youtube_dl/extractor/teachable.py --- old/youtube-dl/youtube_dl/extractor/teachable.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/teachable.py 2020-03-01 14:10:40.000000000 +0100 @@ -4,7 +4,6 @@ from .common import InfoExtractor from .wistia import WistiaIE -from ..compat import compat_str from ..utils import ( clean_html, ExtractorError, @@ -58,7 +57,7 @@ self._logged_in = True return - login_url = compat_str(urlh.geturl()) + login_url = urlh.geturl() login_form = self._hidden_inputs(login_page) @@ -160,8 +159,8 @@ webpage = self._download_webpage(url, video_id) - wistia_url = WistiaIE._extract_url(webpage) - if not wistia_url: + wistia_urls = WistiaIE._extract_urls(webpage) + if not wistia_urls: if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', r'>\s*Lecture contents locked', @@ -174,12 +173,14 @@ title = self._og_search_title(webpage, default=None) - return { + entries = [{ '_type': 'url_transparent', 'url': wistia_url, 'ie_key': WistiaIE.ie_key(), 'title': title, - } + } for wistia_url in wistia_urls] + + return self.playlist_result(entries, video_id, title) class TeachableCourseIE(TeachableBaseIE): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/telecinco.py new/youtube-dl/youtube_dl/extractor/telecinco.py --- old/youtube-dl/youtube_dl/extractor/telecinco.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/telecinco.py 2020-03-01 14:10:40.000000000 +0100 @@ -11,6 +11,7 @@ determine_ext, int_or_none, str_or_none, + try_get, urljoin, ) @@ -24,7 +25,7 @@ 'info_dict': { 'id': '1876350223', 'title': 'Bacalao con kokotxas al pil-pil', - 'description': 'md5:1382dacd32dd4592d478cbdca458e5bb', + 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529', }, 'playlist': [{ 'md5': 'adb28c37238b675dad0f042292f209a7', @@ -56,6 +57,26 @@ 'duration': 50, }, }, { + # video in opening's content + 'url': 'https://www.telecinco.es/vivalavida/fiorella-sobrina-edmundo-arrocet-entrevista_18_2907195140.html', + 'info_dict': { + 'id': '2907195140', + 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', + 'description': 'md5:73f340a7320143d37ab895375b2bf13a', + }, + 'playlist': [{ + 'md5': 'adb28c37238b675dad0f042292f209a7', + 'info_dict': { + 'id': 'TpI2EttSDAReWpJ1o0NVh2', + 'ext': 'mp4', + 'title': 'La surrealista entrevista a la sobrina de Edmundo Arrocet: "No puedes venir aquí y tomarnos por tontos"', + 'duration': 1015, + }, + }], + 'params': { + 'skip_download': True, + }, + }, { 'url': 'http://www.telecinco.es/informativos/nacional/Pablo_Iglesias-Informativos_Telecinco-entrevista-Pedro_Piqueras_2_1945155182.html', 'only_matching': True, }, { @@ -135,17 +156,28 @@ display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) article = self._parse_json(self._search_regex( - r'window\.\$REACTBASE_STATE\.article\s*=\s*({.+})', + r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=\s*({.+})', webpage, 'article'), display_id)['article'] title = article.get('title') - description = clean_html(article.get('leadParagraph')) + description = clean_html(article.get('leadParagraph')) or '' if article.get('editorialType') != 'VID': entries = [] - for p in article.get('body', []): + body = [article.get('opening')] + body.extend(try_get(article, lambda x: x['body'], list) or []) + for p in body: + if not isinstance(p, dict): + continue content = p.get('content') - if p.get('type') != 'video' or not content: + if not content: + continue + type_ = p.get('type') + if type_ == 'paragraph': + content_str = str_or_none(content) + if content_str: + description += content_str continue - entries.append(self._parse_content(content, url)) + if type_ == 'video' and isinstance(content, dict): + entries.append(self._parse_content(content, url)) return self.playlist_result( entries, str_or_none(article.get('id')), title, description) content = article['opening']['content'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tumblr.py new/youtube-dl/youtube_dl/extractor/tumblr.py --- old/youtube-dl/youtube_dl/extractor/tumblr.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tumblr.py 2020-03-01 14:10:40.000000000 +0100 @@ -4,7 +4,6 @@ import re from .common import InfoExtractor -from ..compat import compat_str from ..utils import ( ExtractorError, int_or_none, @@ -151,7 +150,7 @@ url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) webpage, urlh = self._download_webpage_handle(url, video_id) - redirect_url = compat_str(urlh.geturl()) + redirect_url = urlh.geturl() if 'tumblr.com/safe-mode' in redirect_url or redirect_url.startswith('/safe-mode'): raise ExtractorError( 'This Tumblr may contain sensitive media. ' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tv2dk.py new/youtube-dl/youtube_dl/extractor/tv2dk.py --- old/youtube-dl/youtube_dl/extractor/tv2dk.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tv2dk.py 2020-03-01 14:10:40.000000000 +0100 @@ -106,7 +106,7 @@ video_id = self._match_id(url) video = self._download_json( - 'http://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, + 'https://play.tv2bornholm.dk/controls/AJAX.aspx/specifikVideo', video_id, data=json.dumps({ 'playlist_id': video_id, 'serienavn': '', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vimeo.py new/youtube-dl/youtube_dl/extractor/vimeo.py --- old/youtube-dl/youtube_dl/extractor/vimeo.py 2020-02-16 16:41:28.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/vimeo.py 2020-03-01 14:10:40.000000000 +0100 @@ -591,7 +591,7 @@ # Retrieve video webpage to extract further information webpage, urlh = self._download_webpage_handle( url, video_id, headers=headers) - redirect_url = compat_str(urlh.geturl()) + redirect_url = urlh.geturl() except ExtractorError as ee: if isinstance(ee.cause, compat_HTTPError) and ee.cause.code == 403: errmsg = ee.cause.read() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/wistia.py new/youtube-dl/youtube_dl/extractor/wistia.py --- old/youtube-dl/youtube_dl/extractor/wistia.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/wistia.py 2020-03-01 14:10:40.000000000 +0100 @@ -45,22 +45,23 @@ # https://wistia.com/support/embed-and-share/video-on-your-website @staticmethod def _extract_url(webpage): - match = re.search( - r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage) - if match: - return unescapeHTML(match.group('url')) + urls = WistiaIE._extract_urls(webpage) + return urls[0] if urls else None - match = re.search( - r'''(?sx) - <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*? - <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 - ''', webpage) - if match: - return 'wistia:%s' % match.group('id') - - match = re.search(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage) - if match: - return 'wistia:%s' % match.group('id') + @staticmethod + def _extract_urls(webpage): + urls = [] + for match in re.finditer( + r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage): + urls.append(unescapeHTML(match.group('url'))) + for match in re.finditer( + r'''(?sx) + <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]{10})\b.*?\2 + ''', webpage): + urls.append('wistia:%s' % match.group('id')) + for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage): + urls.append('wistia:%s' % match.group('id')) + return urls def _real_extract(self, url): video_id = self._match_id(url) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/xhamster.py new/youtube-dl/youtube_dl/extractor/xhamster.py --- old/youtube-dl/youtube_dl/extractor/xhamster.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/xhamster.py 2020-03-01 14:10:40.000000000 +0100 @@ -113,7 +113,7 @@ display_id = mobj.group('display_id') or mobj.group('display_id_2') desktop_url = re.sub(r'^(https?://(?:.+?\.)?)m\.', r'\1', url) - webpage = self._download_webpage(desktop_url, video_id) + webpage, urlh = self._download_webpage_handle(desktop_url, video_id) error = self._html_search_regex( r'<div[^>]+id=["\']videoClosed["\'][^>]*>(.+?)</div>', @@ -161,6 +161,9 @@ 'ext': determine_ext(format_url, 'mp4'), 'height': get_height(quality), 'filesize': filesize, + 'http_headers': { + 'Referer': urlh.geturl(), + }, }) self._sort_formats(formats) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/xtube.py new/youtube-dl/youtube_dl/extractor/xtube.py --- old/youtube-dl/youtube_dl/extractor/xtube.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/xtube.py 2020-03-01 14:10:40.000000000 +0100 @@ -47,7 +47,7 @@ 'display_id': 'A-Super-Run-Part-1-YT', 'ext': 'flv', 'title': 'A Super Run - Part 1 (YT)', - 'description': 'md5:ca0d47afff4a9b2942e4b41aa970fd93', + 'description': 'md5:4cc3af1aa1b0413289babc88f0d4f616', 'uploader': 'tshirtguy59', 'duration': 579, 'view_count': int, @@ -87,10 +87,24 @@ 'Cookie': 'age_verified=1; cookiesAccepted=1', }) - sources = self._parse_json(self._search_regex( - r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', - webpage, 'sources', group='sources'), video_id, - transform_source=js_to_json) + title, thumbnail, duration = [None] * 3 + + config = self._parse_json(self._search_regex( + r'playerConf\s*=\s*({.+?})\s*,\s*\n', webpage, 'config', + default='{}'), video_id, transform_source=js_to_json, fatal=False) + if config: + config = config.get('mainRoll') + if isinstance(config, dict): + title = config.get('title') + thumbnail = config.get('poster') + duration = int_or_none(config.get('duration')) + sources = config.get('sources') + + if isinstance(sources, dict): + sources = self._parse_json(self._search_regex( + r'(["\'])?sources\1?\s*:\s*(?P<sources>{.+?}),', + webpage, 'sources', group='sources'), video_id, + transform_source=js_to_json) formats = [] for format_id, format_url in sources.items(): @@ -102,20 +116,25 @@ self._remove_duplicate_formats(formats) self._sort_formats(formats) - title = self._search_regex( - (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), - webpage, 'title', group='title') - description = self._search_regex( + if not title: + title = self._search_regex( + (r'<h1>\s*(?P<title>[^<]+?)\s*</h1>', r'videoTitle\s*:\s*(["\'])(?P<title>.+?)\1'), + webpage, 'title', group='title') + description = self._og_search_description( + webpage, default=None) or self._html_search_meta( + 'twitter:description', webpage, default=None) or self._search_regex( r'</h1>\s*<p>([^<]+)', webpage, 'description', fatal=False) uploader = self._search_regex( (r'<input[^>]+name="contentOwnerId"[^>]+value="([^"]+)"', r'<span[^>]+class="nickname"[^>]*>([^<]+)'), webpage, 'uploader', fatal=False) - duration = parse_duration(self._search_regex( - r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', - webpage, 'duration', fatal=False)) + if not duration: + duration = parse_duration(self._search_regex( + r'<dt>Runtime:?</dt>\s*<dd>([^<]+)</dd>', + webpage, 'duration', fatal=False)) view_count = str_to_int(self._search_regex( - r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>', + (r'["\']viewsCount["\'][^>]*>(\d+)\s+views', + r'<dt>Views:?</dt>\s*<dd>([\d,\.]+)</dd>'), webpage, 'view count', fatal=False)) comment_count = str_to_int(self._html_search_regex( r'>Comments? \(([\d,\.]+)\)<', @@ -126,6 +145,7 @@ 'display_id': display_id, 'title': title, 'description': description, + 'thumbnail': thumbnail, 'uploader': uploader, 'duration': duration, 'view_count': view_count, @@ -144,7 +164,7 @@ 'id': 'greenshowers-4056496', 'age_limit': 18, }, - 'playlist_mincount': 155, + 'playlist_mincount': 154, } def _real_extract(self, url): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youjizz.py new/youtube-dl/youtube_dl/extractor/youjizz.py --- old/youtube-dl/youtube_dl/extractor/youjizz.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/youjizz.py 2020-03-01 14:10:40.000000000 +0100 @@ -44,7 +44,7 @@ encodings = self._parse_json( self._search_regex( - r'encodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', + r'[Ee]ncodings\s*=\s*(\[.+?\]);\n', webpage, 'encodings', default='[]'), video_id, fatal=False) for encoding in encodings: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/zapiks.py new/youtube-dl/youtube_dl/extractor/zapiks.py --- old/youtube-dl/youtube_dl/extractor/zapiks.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/zapiks.py 2020-03-01 14:10:40.000000000 +0100 @@ -29,7 +29,6 @@ 'timestamp': 1359044972, 'upload_date': '20130124', 'view_count': int, - 'comment_count': int, }, }, { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/zdf.py new/youtube-dl/youtube_dl/extractor/zdf.py --- old/youtube-dl/youtube_dl/extractor/zdf.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/zdf.py 2020-03-01 14:10:40.000000000 +0100 @@ -244,14 +244,14 @@ 'id': 'das-aktuelle-sportstudio', 'title': 'das aktuelle sportstudio | ZDF', }, - 'playlist_count': 21, + 'playlist_mincount': 23, }, { 'url': 'https://www.zdf.de/dokumentation/planet-e', 'info_dict': { 'id': 'planet-e', 'title': 'planet e.', }, - 'playlist_count': 4, + 'playlist_mincount': 50, }, { 'url': 'https://www.zdf.de/filme/taunuskrimi/', 'only_matching': True, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/options.py new/youtube-dl/youtube_dl/options.py --- old/youtube-dl/youtube_dl/options.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/options.py 2020-03-01 14:10:40.000000000 +0100 @@ -134,7 +134,7 @@ action='help', help='Print this help text and exit') general.add_option( - '-v', '--version', + '--version', action='version', help='Print program version and exit') general.add_option( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/utils.py new/youtube-dl/youtube_dl/utils.py --- old/youtube-dl/youtube_dl/utils.py 2020-02-16 16:41:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/utils.py 2020-03-01 14:10:40.000000000 +0100 @@ -2795,6 +2795,15 @@ https_response = http_response +class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler): + if sys.version_info[0] < 3: + def redirect_request(self, req, fp, code, msg, headers, newurl): + # On python 2 urlh.geturl() may sometimes return redirect URL + # as byte string instead of unicode. This workaround allows + # to force it always return unicode. + return compat_urllib_request.HTTPRedirectHandler.redirect_request(self, req, fp, code, msg, headers, compat_str(newurl)) + + def extract_timezone(date_str): m = re.search( r'^.{8,}?(?P<tz>Z$| ?(?P<sign>\+|-)(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2})$)', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2020-02-16 16:43:38.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2020-03-01 14:11:29.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.02.16' +__version__ = '2020.03.01'
