Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2019-08-14 11:37:49 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.9556 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Wed Aug 14 11:37:49 2019 rev:113 rq:723172 version:2019.08.13 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2019-08-05 10:41:14.907299753 +0200 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.9556/python-youtube-dl.changes 2019-08-14 11:37:54.500691099 +0200 @@ -1,0 +2,8 @@ +Wed Aug 14 03:57:43 UTC 2019 - Jan Engelhardt <[email protected]> + +- Update to new upstream release 2019.08.13 + * downloader/fragment: Fix ETA calculation of resumed download + * discovery: extract series meta field + * roosterteeth: add support for watch URLs + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2019.08.02.tar.gz youtube-dl-2019.08.02.tar.gz.sig New: ---- youtube-dl-2019.08.13.tar.gz youtube-dl-2019.08.13.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.pJGZFe/_old 2019-08-14 11:37:55.540690912 +0200 +++ /var/tmp/diff_new_pack.pJGZFe/_new 2019-08-14 11:37:55.544690912 +0200 @@ -19,9 +19,9 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2019.08.02 +Version: 2019.08.13 Release: 0 -Summary: A python module for downloading from video sites for offline watching +Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 Group: Development/Languages/Python URL: http://rg3.github.io/youtube-dl/ ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.pJGZFe/_old 2019-08-14 11:37:55.560690909 +0200 +++ /var/tmp/diff_new_pack.pJGZFe/_new 2019-08-14 11:37:55.560690909 +0200 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2019.08.02 +Version: 2019.08.13 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2019.08.02.tar.gz -> youtube-dl-2019.08.13.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2019-08-02 00:37:46.000000000 +0200 +++ new/youtube-dl/ChangeLog 2019-08-13 18:18:35.000000000 +0200 @@ -1,3 +1,19 @@ +version 2019.08.13 + +Core +* [downloader/fragment] Fix ETA calculation of resumed download (#21992) +* [YoutubeDL] Check annotations availability (#18582) + +Extractors +* [youtube:playlist] Improve flat extraction (#21927) +* [youtube] Fix annotations extraction (#22045) ++ [discovery] Extract series meta field (#21808) +* [youtube] Improve error detection (#16445) +* [vimeo] Fix album extraction (#1933, #15704, #15855, #18967, #21986) ++ [roosterteeth] Add support for watch URLs +* [discovery] Limit video data by show slug (#21980) + + version 2019.08.02 Extractors diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/README.md new/youtube-dl/README.md --- old/youtube-dl/README.md 2019-08-02 00:37:51.000000000 +0200 +++ new/youtube-dl/README.md 2019-08-13 18:18:37.000000000 +0200 @@ -1216,6 +1216,72 @@ 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` +### Inline values + +Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. + +#### Example + +Correct: + +```python +title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') +``` + +Incorrect: + +```python +TITLE_RE = r'<title>([^<]+)</title>' +# ...some lines of code... +title = self._html_search_regex(TITLE_RE, webpage, 'title') +``` + +### Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. + +#### Example + +Good: + +```python +description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) +``` + +Unwieldy: + +```python +description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) +``` + +Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. + +### Trailing parentheses + +Always move trailing parentheses after the last argument. + +#### Example + +Correct: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) +``` + +Incorrect: + +```python + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, +) +``` + ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`youtube_dl/utils.py`](https://github.com/ytdl-org/youtube-dl/blob/master/youtube_dl/utils.py): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/README.txt new/youtube-dl/README.txt --- old/youtube-dl/README.txt 2019-08-02 00:39:47.000000000 +0200 +++ new/youtube-dl/README.txt 2019-08-13 18:19:20.000000000 +0200 @@ -1648,6 +1648,65 @@ 'https://www.youtube.com/watch?v=FqZTN594JQw&list=' 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' +Inline values + +Extracting variables is acceptable for reducing code duplication and +improving readability of complex expressions. However, you should avoid +extracting variables used only once and moving them to opposite parts of +the extractor file, which makes reading the linear flow difficult. + +Example + +Correct: + + title = self._html_search_regex(r'<title>([^<]+)</title>', webpage, 'title') + +Incorrect: + + TITLE_RE = r'<title>([^<]+)</title>' + # ...some lines of code... + title = self._html_search_regex(TITLE_RE, webpage, 'title') + +Collapse fallbacks + +Multiple fallback values can quickly become unwieldy. Collapse multiple +fallback values into a single expression via a list of patterns. + +Example + +Good: + + description = self._html_search_meta( + ['og:description', 'description', 'twitter:description'], + webpage, 'description', default=None) + +Unwieldy: + + description = ( + self._og_search_description(webpage, default=None) + or self._html_search_meta('description', webpage, default=None) + or self._html_search_meta('twitter:description', webpage, default=None)) + +Methods supporting list of patterns are: _search_regex, +_html_search_regex, _og_search_property, _html_search_meta. + +Trailing parentheses + +Always move trailing parentheses after the last argument. + +Example + +Correct: + + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list) + +Incorrect: + + lambda x: x['ResultSet']['Result'][0]['VideoUrlSet']['VideoUrl'], + list, + ) + Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube-dl.1 new/youtube-dl/youtube-dl.1 --- old/youtube-dl/youtube-dl.1 2019-08-02 00:40:01.000000000 +0200 +++ new/youtube-dl/youtube-dl.1 2019-08-13 18:19:22.000000000 +0200 @@ -2423,6 +2423,86 @@ \[aq]PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4\[aq] \f[] .fi +.SS Inline values +.PP +Extracting variables is acceptable for reducing code duplication and +improving readability of complex expressions. +However, you should avoid extracting variables used only once and moving +them to opposite parts of the extractor file, which makes reading the +linear flow difficult. +.SS Example +.PP +Correct: +.IP +.nf +\f[C] +title\ =\ self._html_search_regex(r\[aq]<title>([^<]+)</title>\[aq],\ webpage,\ \[aq]title\[aq]) +\f[] +.fi +.PP +Incorrect: +.IP +.nf +\f[C] +TITLE_RE\ =\ r\[aq]<title>([^<]+)</title>\[aq] +#\ ...some\ lines\ of\ code... +title\ =\ self._html_search_regex(TITLE_RE,\ webpage,\ \[aq]title\[aq]) +\f[] +.fi +.SS Collapse fallbacks +.PP +Multiple fallback values can quickly become unwieldy. +Collapse multiple fallback values into a single expression via a list of +patterns. +.SS Example +.PP +Good: +.IP +.nf +\f[C] +description\ =\ self._html_search_meta( +\ \ \ \ [\[aq]og:description\[aq],\ \[aq]description\[aq],\ \[aq]twitter:description\[aq]], +\ \ \ \ webpage,\ \[aq]description\[aq],\ default=None) +\f[] +.fi +.PP +Unwieldy: +.IP +.nf +\f[C] +description\ =\ ( +\ \ \ \ self._og_search_description(webpage,\ default=None) +\ \ \ \ or\ self._html_search_meta(\[aq]description\[aq],\ webpage,\ default=None) +\ \ \ \ or\ self._html_search_meta(\[aq]twitter:description\[aq],\ webpage,\ default=None)) +\f[] +.fi +.PP +Methods supporting list of patterns are: \f[C]_search_regex\f[], +\f[C]_html_search_regex\f[], \f[C]_og_search_property\f[], +\f[C]_html_search_meta\f[]. +.SS Trailing parentheses +.PP +Always move trailing parentheses after the last argument. +.SS Example +.PP +Correct: +.IP +.nf +\f[C] +\ \ \ \ lambda\ x:\ x[\[aq]ResultSet\[aq]][\[aq]Result\[aq]][0][\[aq]VideoUrlSet\[aq]][\[aq]VideoUrl\[aq]], +\ \ \ \ list) +\f[] +.fi +.PP +Incorrect: +.IP +.nf +\f[C] +\ \ \ \ lambda\ x:\ x[\[aq]ResultSet\[aq]][\[aq]Result\[aq]][0][\[aq]VideoUrlSet\[aq]][\[aq]VideoUrl\[aq]], +\ \ \ \ list, +) +\f[] +.fi .SS Use convenience conversion and parsing functions .PP Wrap all extracted numeric data into safe functions from diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/YoutubeDL.py new/youtube-dl/youtube_dl/YoutubeDL.py --- old/youtube-dl/youtube_dl/YoutubeDL.py 2019-08-02 00:37:17.000000000 +0200 +++ new/youtube-dl/youtube_dl/YoutubeDL.py 2019-08-13 18:18:19.000000000 +0200 @@ -1783,6 +1783,8 @@ annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext')) if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') + elif not info_dict.get('annotations'): + self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/downloader/fragment.py new/youtube-dl/youtube_dl/downloader/fragment.py --- old/youtube-dl/youtube_dl/downloader/fragment.py 2019-08-02 00:37:17.000000000 +0200 +++ new/youtube-dl/youtube_dl/downloader/fragment.py 2019-08-13 18:18:19.000000000 +0200 @@ -190,12 +190,13 @@ }) def _start_frag_download(self, ctx): + resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] # This dict stores the download progress, it's updated by the progress # hook state = { 'status': 'downloading', - 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], + 'downloaded_bytes': resume_len, 'fragment_index': ctx['fragment_index'], 'fragment_count': total_frags, 'filename': ctx['filename'], @@ -234,8 +235,8 @@ state['downloaded_bytes'] += frag_downloaded_bytes - ctx['prev_frag_downloaded_bytes'] if not ctx['live']: state['eta'] = self.calc_eta( - start, time_now, estimated_size, - state['downloaded_bytes']) + start, time_now, estimated_size - resume_len, + state['downloaded_bytes'] - resume_len) state['speed'] = s.get('speed') or ctx.get('speed') ctx['speed'] = state['speed'] ctx['prev_frag_downloaded_bytes'] = frag_downloaded_bytes diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/discovery.py new/youtube-dl/youtube_dl/extractor/discovery.py --- old/youtube-dl/youtube_dl/extractor/discovery.py 2019-08-02 00:37:30.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/discovery.py 2019-08-13 18:18:20.000000000 +0200 @@ -34,7 +34,7 @@ cookingchanneltv| motortrend ) - )\.com/tv-shows/[^/]+/(?:video|full-episode)s/(?P<id>[^./?#]+)''' + )\.com/tv-shows/(?P<show_slug>[^/]+)/(?:video|full-episode)s/(?P<id>[^./?#]+)''' _TESTS = [{ 'url': 'https://go.discovery.com/tv-shows/cash-cab/videos/riding-with-matthew-perry', 'info_dict': { @@ -53,13 +53,17 @@ }, { 'url': 'https://go.discovery.com/tv-shows/alaskan-bush-people/videos/follow-your-own-road', 'only_matching': True, + }, { + # using `show_slug` is important to get the correct video data + 'url': 'https://www.sciencechannel.com/tv-shows/mythbusters-on-science/full-episodes/christmas-special', + 'only_matching': True, }] _GEO_COUNTRIES = ['US'] _GEO_BYPASS = False _API_BASE_URL = 'https://api.discovery.com/v1/' def _real_extract(self, url): - site, display_id = re.match(self._VALID_URL, url).groups() + site, show_slug, display_id = re.match(self._VALID_URL, url).groups() access_token = None cookies = self._get_cookies(url) @@ -90,7 +94,10 @@ self._API_BASE_URL + 'content/videos', display_id, 'Downloading content JSON metadata', headers=headers, query={ + 'embed': 'show.name', + 'fields': 'authenticated,description.detailed,duration,episodeNumber,id,name,parental.rating,season.number,show,tags', 'slug': display_id, + 'show_slug': show_slug, })[0] video_id = video['id'] stream = self._download_json( diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/roosterteeth.py new/youtube-dl/youtube_dl/extractor/roosterteeth.py --- old/youtube-dl/youtube_dl/extractor/roosterteeth.py 2019-08-02 00:37:20.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/roosterteeth.py 2019-08-13 18:18:21.000000000 +0200 @@ -17,7 +17,7 @@ class RoosterTeethIE(InfoExtractor): - _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/episode/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:episode|watch)/(?P<id>[^/?#&]+)' _LOGIN_URL = 'https://roosterteeth.com/login' _NETRC_MACHINE = 'roosterteeth' _TESTS = [{ @@ -49,6 +49,9 @@ # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, + }, { + 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', + 'only_matching': True, }] def _login(self): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/vimeo.py new/youtube-dl/youtube_dl/extractor/vimeo.py --- old/youtube-dl/youtube_dl/extractor/vimeo.py 2019-08-02 00:37:21.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/vimeo.py 2019-08-13 18:18:22.000000000 +0200 @@ -2,12 +2,14 @@ from __future__ import unicode_literals import base64 +import functools import json import re import itertools from .common import InfoExtractor from ..compat import ( + compat_kwargs, compat_HTTPError, compat_str, compat_urlparse, @@ -19,6 +21,7 @@ int_or_none, merge_dicts, NO_DEFAULT, + OnDemandPagedList, parse_filesize, qualities, RegexNotFoundError, @@ -98,6 +101,13 @@ webpage, 'vuid', group='vuid') return xsrft, vuid + def _extract_vimeo_config(self, webpage, video_id, *args, **kwargs): + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', + webpage, 'vimeo config', *args, **compat_kwargs(kwargs)) + if vimeo_config: + return self._parse_json(vimeo_config, video_id) + def _set_vimeo_cookie(self, name, value): self._set_cookie('vimeo.com', name, value) @@ -253,7 +263,7 @@ \. )? vimeo(?P<pro>pro)?\.com/ - (?!(?:channels|album)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) + (?!(?:channels|album|showcase)/[^/?#]+/?(?:$|[?#])|[^/]+/review/|ondemand/) (?:.*?/)? (?: (?: @@ -580,11 +590,9 @@ # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) - vimeo_config = self._search_regex( - r'vimeo\.config\s*=\s*(?:({.+?})|_extend\([^,]+,\s+({.+?})\));', webpage, - 'vimeo config', default=None) + vimeo_config = self._extract_vimeo_config(webpage, video_id, default=None) if vimeo_config: - seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + seed_status = vimeo_config.get('seed_status', {}) if seed_status.get('state') == 'failed': raise ExtractorError( '%s said: %s' % (self.IE_NAME, seed_status['title']), @@ -905,7 +913,7 @@ class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https://vimeo\.com/album/(?P<id>\d+)(?:$|[?#]|/(?!video))' + _VALID_URL = r'https://vimeo\.com/(?:album|showcase)/(?P<id>\d+)(?:$|[?#]|/(?!video))' _TITLE_RE = r'<header id="page_header">\n\s*<h1>(.*?)</h1>' _TESTS = [{ 'url': 'https://vimeo.com/album/2632481', @@ -925,21 +933,39 @@ 'params': { 'videopassword': 'youtube-dl', } - }, { - 'url': 'https://vimeo.com/album/2632481/sort:plays/format:thumbnail', - 'only_matching': True, - }, { - # TODO: respect page number - 'url': 'https://vimeo.com/album/2632481/page:2/sort:plays/format:thumbnail', - 'only_matching': True, }] + _PAGE_SIZE = 100 - def _page_url(self, base_url, pagenum): - return '%s/page:%d/' % (base_url, pagenum) + def _fetch_page(self, album_id, authorizaion, hashed_pass, page): + api_page = page + 1 + query = { + 'fields': 'link', + 'page': api_page, + 'per_page': self._PAGE_SIZE, + } + if hashed_pass: + query['_hashed_pass'] = hashed_pass + videos = self._download_json( + 'https://api.vimeo.com/albums/%s/videos' % album_id, + album_id, 'Downloading page %d' % api_page, query=query, headers={ + 'Authorization': 'jwt ' + authorizaion, + })['data'] + for video in videos: + link = video.get('link') + if not link: + continue + yield self.url_result(link, VimeoIE.ie_key(), VimeoIE._match_id(link)) def _real_extract(self, url): album_id = self._match_id(url) - return self._extract_videos(album_id, 'https://vimeo.com/album/%s' % album_id) + webpage = self._download_webpage(url, album_id) + webpage = self._login_list_password(url, album_id, webpage) + api_config = self._extract_vimeo_config(webpage, album_id)['api'] + entries = OnDemandPagedList(functools.partial( + self._fetch_page, album_id, api_config['jwt'], + api_config.get('hashed_pass')), self._PAGE_SIZE) + return self.playlist_result(entries, album_id, self._html_search_regex( + r'<title>\s*(.+?)(?:\s+on Vimeo)?</title>', webpage, 'title', fatal=False)) class VimeoGroupsIE(VimeoAlbumIE): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2019-08-02 00:37:30.000000000 +0200 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2019-08-13 18:18:22.000000000 +0200 @@ -31,6 +31,7 @@ clean_html, dict_get, error_to_compat_str, + extract_attributes, ExtractorError, float_or_none, get_element_by_attribute, @@ -324,17 +325,18 @@ for video_id, video_title in self.extract_videos_from_page(content): yield self.url_result(video_id, 'Youtube', video_id, video_title) - def extract_videos_from_page(self, page): - ids_in_page = [] - titles_in_page = [] - for mobj in re.finditer(self._VIDEO_RE, page): + def extract_videos_from_page_impl(self, video_re, page, ids_in_page, titles_in_page): + for mobj in re.finditer(video_re, page): # The link with index 0 is not the first video of the playlist (not sure if still actual) if 'index' in mobj.groupdict() and mobj.group('id') == '0': continue video_id = mobj.group('id') - video_title = unescapeHTML(mobj.group('title')) + video_title = unescapeHTML( + mobj.group('title')) if 'title' in mobj.groupdict() else None if video_title: video_title = video_title.strip() + if video_title == '► Play all': + video_title = None try: idx = ids_in_page.index(video_id) if video_title and not titles_in_page[idx]: @@ -342,6 +344,12 @@ except ValueError: ids_in_page.append(video_id) titles_in_page.append(video_title) + + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) return zip(ids_in_page, titles_in_page) @@ -1595,17 +1603,6 @@ video_id = mobj.group(2) return video_id - def _extract_annotations(self, video_id): - return self._download_webpage( - 'https://www.youtube.com/annotations_invideo', video_id, - note='Downloading annotations', - errnote='Unable to download video annotations', fatal=False, - query={ - 'features': 1, - 'legacy': 1, - 'video_id': video_id, - }) - @staticmethod def _extract_chapters(description, duration): if not description: @@ -1813,7 +1810,8 @@ def extract_unavailable_message(): return self._html_search_regex( - r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', + (r'(?s)<div[^>]+id=["\']unavailable-submessage["\'][^>]+>(.+?)</div', + r'(?s)<h1[^>]+id=["\']unavailable-message["\'][^>]*>(.+?)</h1>'), video_webpage, 'unavailable message', default=None) if not video_info: @@ -2098,9 +2096,14 @@ a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' formats.append(a_format) else: - error_message = clean_html(video_info.get('reason', [None])[0]) + error_message = extract_unavailable_message() + if not error_message: + error_message = clean_html(try_get( + player_response, lambda x: x['playabilityStatus']['reason'], + compat_str)) if not error_message: - error_message = extract_unavailable_message() + error_message = clean_html( + try_get(video_info, lambda x: x['reason'][0], compat_str)) if error_message: raise ExtractorError(error_message, expected=True) raise ExtractorError('no conn, hlsvp, hlsManifestUrl or url_encoded_fmt_stream_map information found in video info') @@ -2271,7 +2274,21 @@ # annotations video_annotations = None if self._downloader.params.get('writeannotations', False): - video_annotations = self._extract_annotations(video_id) + xsrf_token = self._search_regex( + r'([\'"])XSRF_TOKEN\1\s*:\s*([\'"])(?P<xsrf_token>[A-Za-z0-9+/=]+)\2', + video_webpage, 'xsrf token', group='xsrf_token', fatal=False) + invideo_url = try_get( + player_response, lambda x: x['annotations'][0]['playerAnnotationsUrlsRenderer']['invideoUrl'], compat_str) + if xsrf_token and invideo_url: + xsrf_field_name = self._search_regex( + r'([\'"])XSRF_FIELD_NAME\1\s*:\s*([\'"])(?P<xsrf_field_name>\w+)\2', + video_webpage, 'xsrf field name', + group='xsrf_field_name', default='session_token') + video_annotations = self._download_webpage( + self._proto_relative_url(invideo_url), + video_id, note='Downloading annotations', + errnote='Unable to download video annotations', fatal=False, + data=urlencode_postdata({xsrf_field_name: xsrf_token})) chapters = self._extract_chapters(description_original, video_duration) @@ -2429,7 +2446,8 @@ (%(playlist_id)s) )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' + _VIDEO_RE_TPL = r'href="\s*/watch\?v=%s(?:&(?:[^"]*?index=(?P<index>\d+))?(?:[^>]+>(?P<title>[^<]+))?)?' + _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -2594,6 +2612,34 @@ def _real_initialize(self): self._login() + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + + for item in re.findall( + r'(<[^>]*\bdata-video-id\s*=\s*["\'][0-9A-Za-z_-]{11}[^>]+>)', page): + attrs = extract_attributes(item) + video_id = attrs['data-video-id'] + video_title = unescapeHTML(attrs.get('data-title')) + if video_title: + video_title = video_title.strip() + ids_in_page.append(video_id) + titles_in_page.append(video_title) + + # Fallback with old _VIDEO_RE + self.extract_videos_from_page_impl( + self._VIDEO_RE, page, ids_in_page, titles_in_page) + + # Relaxed fallbacks + self.extract_videos_from_page_impl( + r'href="\s*/watch\?v\s*=\s*(?P<id>[0-9A-Za-z_-]{11})', page, + ids_in_page, titles_in_page) + self.extract_videos_from_page_impl( + r'data-video-ids\s*=\s*["\'](?P<id>[0-9A-Za-z_-]{11})', page, + ids_in_page, titles_in_page) + + return zip(ids_in_page, titles_in_page) + def _extract_mix(self, playlist_id): # The mixes are generated from a single video # the id of the playlist is just 'RD' + video_id diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2019-08-02 00:37:46.000000000 +0200 +++ new/youtube-dl/youtube_dl/version.py 2019-08-13 18:18:35.000000000 +0200 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.08.02' +__version__ = '2019.08.13'
