Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2020-12-02 13:58:58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.5913 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Wed Dec 2 13:58:58 2020 rev:147 rq:852436 version:2020.12.02 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2020-11-29 12:34:06.290346664 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.5913/python-youtube-dl.changes 2020-12-02 13:58:59.845850204 +0100 @@ -1,0 +2,9 @@ +Tue Dec 1 19:49:34 UTC 2020 - Jan Engelhardt <[email protected]> + +- Update to release 2020.12.02 + * yandexmusic: Refactor and add support for artist's tracks + and albums + * mediaset: add support for movie URLs + * youtube: Improve age-gated videos extraction + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2020.11.29.tar.gz youtube-dl-2020.11.29.tar.gz.sig New: ---- youtube-dl-2020.12.02.tar.gz youtube-dl-2020.12.02.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.zaIRGB/_old 2020-12-02 13:59:00.941851359 +0100 +++ /var/tmp/diff_new_pack.zaIRGB/_new 2020-12-02 13:59:00.945851363 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2020.11.29 +Version: 2020.12.02 Release: 0 Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.zaIRGB/_old 2020-12-02 13:59:00.961851380 +0100 +++ /var/tmp/diff_new_pack.zaIRGB/_new 2020-12-02 13:59:00.965851384 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2020.11.29 +Version: 2020.12.02 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2020.11.29.tar.gz -> youtube-dl-2020.12.02.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2020-11-29 07:52:58.000000000 +0100 +++ new/youtube-dl/ChangeLog 2020-12-01 19:37:37.000000000 +0100 @@ -1,3 +1,20 @@ +version 2020.12.02 + +Extractors ++ [tva] Add support for qub.ca (#27235) ++ [toggle] Detect DRM protected videos (closes #16479)(closes #20805) ++ [toggle] Add support for new MeWatch URLs (#27256) +* [youtube:tab] Extract channels only from channels tab (#27266) ++ [cspan] Extract info from jwplayer data (#3672, #3734, #10638, #13030, + #18806, #23148, #24461, #26171, #26800, #27263) +* [cspan] Pass Referer header with format's video URL (#26032, #25729) +* [youtube] Improve age-gated videos extraction (#27259) ++ [mediaset] Add support for movie URLs (#27240) +* [yandexmusic] Refactor ++ [yandexmusic] Add support for artist's tracks and albums (#11887, #22284) +* [yandexmusic:track] Fix extraction (#26449, #26669, #26747, #26748, #26762) + + version 2020.11.29 Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2020-11-29 07:53:01.000000000 +0100 +++ new/youtube-dl/docs/supportedsites.md 2020-12-01 19:37:40.000000000 +0100 @@ -486,6 +486,7 @@ - **META** - **metacafe** - **Metacritic** + - **mewatch** - **Mgoon** - **MGTV**: 芒果TV - **MiaoPai** @@ -1132,6 +1133,8 @@ - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом + - **yandexmusic:artist:albums**: Яндекс.Музыка - Артист - Альбомы + - **yandexmusic:artist:tracks**: Яндекс.Музыка - Артист - Треки - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек - **YandexVideo** Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/cspan.py new/youtube-dl/youtube_dl/extractor/cspan.py --- old/youtube-dl/youtube_dl/extractor/cspan.py 2020-11-29 07:51:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/cspan.py 2020-11-29 07:55:20.000000000 +0100 @@ -10,6 +10,8 @@ find_xpath_attr, get_element_by_class, int_or_none, + js_to_json, + merge_dicts, smuggle_url, unescapeHTML, ) @@ -98,6 +100,26 @@ bc_attr['data-bcid']) return self.url_result(smuggle_url(bc_url, {'source_url': url})) + def add_referer(formats): + for f in formats: + f.setdefault('http_headers', {})['Referer'] = url + + # As of 01.12.2020 this path looks to cover all cases making the rest + # of the code unnecessary + jwsetup = self._parse_json( + self._search_regex( + r'(?s)jwsetup\s*=\s*({.+?})\s*;', webpage, 'jwsetup', + default='{}'), + video_id, transform_source=js_to_json, fatal=False) + if jwsetup: + info = self._parse_jwplayer_data( + jwsetup, video_id, require_title=False, m3u8_id='hls', + base_url=url) + add_referer(info['formats']) + ld_info = self._search_json_ld(webpage, video_id, default={}) + return merge_dicts(info, ld_info) + + # Obsolete # We first look for clipid, because clipprog always appears before patterns = [r'id=\'clip(%s)\'\s*value=\'([0-9]+)\'' % t for t in ('id', 'prog')] results = list(filter(None, (re.search(p, webpage) for p in patterns))) @@ -165,6 +187,7 @@ formats = self._extract_m3u8_formats( path, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') if determine_ext(path) == 'm3u8' else [{'url': path, }] + add_referer(formats) self._sort_formats(formats) entries.append({ 'id': '%s_%d' % (video_id, partnum + 1), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/extractors.py new/youtube-dl/youtube_dl/extractor/extractors.py --- old/youtube-dl/youtube_dl/extractor/extractors.py 2020-11-29 07:51:54.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/extractors.py 2020-11-29 07:55:20.000000000 +0100 @@ -1185,7 +1185,10 @@ EMPFlixIE, MovieFapIE, ) -from .toggle import ToggleIE +from .toggle import ( + ToggleIE, + MeWatchIE, +) from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE @@ -1478,6 +1481,8 @@ YandexMusicTrackIE, YandexMusicAlbumIE, YandexMusicPlaylistIE, + YandexMusicArtistTracksIE, + YandexMusicArtistAlbumsIE, ) from .yandexvideo import YandexVideoIE from .yapfiles import YapFilesIE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mediaset.py new/youtube-dl/youtube_dl/extractor/mediaset.py --- old/youtube-dl/youtube_dl/extractor/mediaset.py 2020-11-29 07:51:55.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mediaset.py 2020-11-29 07:55:20.000000000 +0100 @@ -23,7 +23,7 @@ https?:// (?:(?:www|static3)\.)?mediasetplay\.mediaset\.it/ (?: - (?:video|on-demand)/(?:[^/]+/)+[^/]+_| + (?:video|on-demand|movie)/(?:[^/]+/)+[^/]+_| player/index\.html\?.*?\bprogramGuid= ) )(?P<id>[0-9A-Z]{16,}) @@ -88,6 +88,9 @@ }, { 'url': 'https://www.mediasetplay.mediaset.it/video/grandefratellovip/benedetta-una-doccia-gelata_F309344401044C135', 'only_matching': True, + }, { + 'url': 'https://www.mediasetplay.mediaset.it/movie/herculeslaleggendahainizio/hercules-la-leggenda-ha-inizio_F305927501000102', + 'only_matching': True, }] @staticmethod diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/toggle.py new/youtube-dl/youtube_dl/extractor/toggle.py --- old/youtube-dl/youtube_dl/extractor/toggle.py 2020-11-29 07:51:55.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/toggle.py 2020-11-29 07:55:20.000000000 +0100 @@ -11,13 +11,13 @@ float_or_none, int_or_none, parse_iso8601, - sanitized_Request, + strip_or_none, ) class ToggleIE(InfoExtractor): IE_NAME = 'toggle' - _VALID_URL = r'https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}(?P<id>[0-9]+)' + _VALID_URL = r'(?:https?://(?:(?:www\.)?mewatch|video\.toggle)\.sg/(?:en|zh)/(?:[^/]+/){2,}|toggle:)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.mewatch.sg/en/series/lion-moms-tif/trailers/lion-moms-premier/343115', 'info_dict': { @@ -84,28 +84,12 @@ 'only_matching': True, }] - _FORMAT_PREFERENCES = { - 'wvm-STBMain': -10, - 'wvm-iPadMain': -20, - 'wvm-iPhoneMain': -30, - 'wvm-Android': -40, - } _API_USER = 'tvpapi_147' _API_PASS = '11111' def _real_extract(self, url): video_id = self._match_id(url) - webpage = self._download_webpage( - url, video_id, note='Downloading video page') - - api_user = self._search_regex( - r'apiUser\s*:\s*(["\'])(?P<user>.+?)\1', webpage, 'apiUser', - default=self._API_USER, group='user') - api_pass = self._search_regex( - r'apiPass\s*:\s*(["\'])(?P<pass>.+?)\1', webpage, 'apiPass', - default=self._API_PASS, group='pass') - params = { 'initObj': { 'Locale': { @@ -118,17 +102,16 @@ 'SiteGuid': 0, 'DomainID': '0', 'UDID': '', - 'ApiUser': api_user, - 'ApiPass': api_pass + 'ApiUser': self._API_USER, + 'ApiPass': self._API_PASS }, 'MediaID': video_id, 'mediaType': 0, } - req = sanitized_Request( + info = self._download_json( 'http://tvpapi.as.tvinci.com/v2_9/gateways/jsonpostgw.aspx?m=GetMediaInfo', - json.dumps(params).encode('utf-8')) - info = self._download_json(req, video_id, 'Downloading video info json') + video_id, 'Downloading video info json', data=json.dumps(params).encode('utf-8')) title = info['MediaName'] @@ -141,11 +124,16 @@ vid_format = vid_format.replace(' ', '') # if geo-restricted, m3u8 is inaccessible, but mp4 is okay if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( video_url, video_id, ext='mp4', m3u8_id=vid_format, note='Downloading %s m3u8 information' % vid_format, errnote='Failed to download %s m3u8 information' % vid_format, - fatal=False)) + fatal=False) + for f in m3u8_formats: + # Apple FairPlay Streaming + if '/fpshls/' in f['url']: + continue + formats.append(f) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id=vid_format, @@ -158,28 +146,21 @@ note='Downloading %s ISM manifest' % vid_format, errnote='Failed to download %s ISM manifest' % vid_format, fatal=False)) - elif ext in ('mp4', 'wvm'): - # wvm are drm-protected files + elif ext == 'mp4': formats.append({ 'ext': ext, 'url': video_url, 'format_id': vid_format, - 'preference': self._FORMAT_PREFERENCES.get(ext + '-' + vid_format) or -1, - 'format_note': 'DRM-protected video' if ext == 'wvm' else None }) if not formats: + for meta in (info.get('Metas') or []): + if meta.get('Key') == 'Encryption' and meta.get('Value') == '1': + raise ExtractorError( + 'This video is DRM protected.', expected=True) # Most likely because geo-blocked raise ExtractorError('No downloadable videos found', expected=True) self._sort_formats(formats) - duration = int_or_none(info.get('Duration')) - description = info.get('Description') - created_at = parse_iso8601(info.get('CreationDate') or None) - - average_rating = float_or_none(info.get('Rating')) - view_count = int_or_none(info.get('ViewCounter') or info.get('view_counter')) - like_count = int_or_none(info.get('LikeCounter') or info.get('like_counter')) - thumbnails = [] for picture in info.get('Pictures', []): if not isinstance(picture, dict): @@ -199,15 +180,46 @@ }) thumbnails.append(thumbnail) + def counter(prefix): + return int_or_none( + info.get(prefix + 'Counter') or info.get(prefix.lower() + '_counter')) + return { 'id': video_id, 'title': title, - 'description': description, - 'duration': duration, - 'timestamp': created_at, - 'average_rating': average_rating, - 'view_count': view_count, - 'like_count': like_count, + 'description': strip_or_none(info.get('Description')), + 'duration': int_or_none(info.get('Duration')), + 'timestamp': parse_iso8601(info.get('CreationDate') or None), + 'average_rating': float_or_none(info.get('Rating')), + 'view_count': counter('View'), + 'like_count': counter('Like'), 'thumbnails': thumbnails, 'formats': formats, } + + +class MeWatchIE(InfoExtractor): + IE_NAME = 'mewatch' + _VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[0-9a-zA-Z-]+-(?P<id>[0-9]+)' + _TESTS = [{ + 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371', + 'info_dict': { + 'id': '1008625', + 'ext': 'mp4', + 'title': 'Recipe Of Life 味之道', + 'timestamp': 1603306526, + 'description': 'md5:6e88cde8af2068444fc8e1bc3ebf257c', + 'upload_date': '20201021', + }, + 'params': { + 'skip_download': 'm3u8 download', + }, + }] + + def _real_extract(self, url): + item_id = self._match_id(url) + custom_id = self._download_json( + 'https://cdn.mewatch.sg/api/items/' + item_id, + item_id, query={'segments': 'all'})['customId'] + return self.url_result( + 'toggle:' + custom_id, ToggleIE.ie_key(), custom_id) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tva.py new/youtube-dl/youtube_dl/extractor/tva.py --- old/youtube-dl/youtube_dl/extractor/tva.py 2020-11-29 07:51:55.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tva.py 2020-11-29 07:55:20.000000000 +0100 @@ -4,7 +4,9 @@ from .common import InfoExtractor from ..utils import ( float_or_none, + int_or_none, smuggle_url, + strip_or_none, ) @@ -23,7 +25,8 @@ 'params': { # m3u8 download 'skip_download': True, - } + }, + 'skip': 'HTTP Error 404: Not Found', }, { 'url': 'https://video.tva.ca/details/_5596811470001', 'only_matching': True, @@ -32,26 +35,54 @@ def _real_extract(self, url): video_id = self._match_id(url) - video_data = self._download_json( - 'https://videos.tva.ca/proxy/item/_' + video_id, video_id, headers={ - 'Accept': 'application/json', - }, query={ - 'appId': '5955fc5f23eec60006c951f1', - }) - - def get_attribute(key): - for attribute in video_data.get('attributes', []): - if attribute.get('key') == key: - return attribute.get('value') - return None return { '_type': 'url_transparent', 'id': video_id, - 'title': get_attribute('title'), 'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, {'geo_countries': ['CA']}), - 'description': get_attribute('description'), - 'thumbnail': get_attribute('image-background') or get_attribute('image-landscape'), - 'duration': float_or_none(get_attribute('video-duration'), 1000), 'ie_key': 'BrightcoveNew', } + + +class QubIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?qub\.ca/(?:[^/]+/)*[0-9a-z-]+-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.qub.ca/tvaplus/tva/alerte-amber/saison-1/episode-01-1000036619', + 'md5': '949490fd0e7aee11d0543777611fbd53', + 'info_dict': { + 'id': '6084352463001', + 'ext': 'mp4', + 'title': 'Épisode 01', + 'uploader_id': '5481942443001', + 'upload_date': '20190907', + 'timestamp': 1567899756, + 'description': 'md5:9c0d7fbb90939420c651fd977df90145', + }, + }, { + 'url': 'https://www.qub.ca/tele/video/lcn-ca-vous-regarde-rev-30s-ap369664-1009357943', + 'only_matching': True, + }] + # reference_id also works with old account_id(5481942443001) + # BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5813221784001/default_default/index.html?videoId=ref:%s' + + def _real_extract(self, url): + entity_id = self._match_id(url) + entity = self._download_json( + 'https://www.qub.ca/proxy/pfu/content-delivery-service/v1/entities', + entity_id, query={'id': entity_id}) + video_id = entity['videoId'] + episode = strip_or_none(entity.get('name')) + + return { + '_type': 'url_transparent', + 'id': video_id, + 'title': episode, + # 'url': self.BRIGHTCOVE_URL_TEMPLATE % entity['referenceId'], + 'url': 'https://videos.tva.ca/details/_' + video_id, + 'description': entity.get('longDescription'), + 'duration': float_or_none(entity.get('durationMillis'), 1000), + 'episode': episode, + 'episode_number': int_or_none(entity.get('episodeNumber')), + # 'ie_key': 'BrightcoveNew', + 'ie_key': TVAIE.ie_key(), + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/yandexmusic.py new/youtube-dl/youtube_dl/extractor/yandexmusic.py --- old/youtube-dl/youtube_dl/extractor/yandexmusic.py 2020-11-29 07:51:55.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/yandexmusic.py 2020-11-29 07:55:20.000000000 +0100 @@ -46,57 +46,69 @@ self._handle_error(response) return response + def _call_api(self, ep, tld, url, item_id, note, query): + return self._download_json( + 'https://music.yandex.%s/handlers/%s.jsx' % (tld, ep), + item_id, note, + fatal=False, + headers={ + 'Referer': url, + 'X-Requested-With': 'XMLHttpRequest', + 'X-Retpath-Y': url, + }, + query=query) + class YandexMusicTrackIE(YandexMusicBaseIE): IE_NAME = 'yandexmusic:track' IE_DESC = 'Яндекс.Музыка - Трек' - _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' + _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<album_id>\d+)/track/(?P<id>\d+)' _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508/track/4878838', - 'md5': 'f496818aa2f60b6c0062980d2e00dc20', + 'md5': 'dec8b661f12027ceaba33318787fff76', 'info_dict': { 'id': '4878838', 'ext': 'mp3', - 'title': 'Carlo Ambrosio & Fabio Di Bari - Gypsy Eyes 1', - 'filesize': 4628061, + 'title': 'md5:c63e19341fdbe84e43425a30bc777856', + 'filesize': int, 'duration': 193.04, - 'track': 'Gypsy Eyes 1', - 'album': 'Gypsy Soul', - 'album_artist': 'Carlo Ambrosio', - 'artist': 'Carlo Ambrosio & Fabio Di Bari', + 'track': 'md5:210508c6ffdfd67a493a6c378f22c3ff', + 'album': 'md5:cd04fb13c4efeafdfa0a6a6aca36d01a', + 'album_artist': 'md5:5f54c35462c07952df33d97cfb5fc200', + 'artist': 'md5:e6fd86621825f14dc0b25db3acd68160', 'release_year': 2009, }, - 'skip': 'Travis CI servers blocked by YandexMusic', + # 'skip': 'Travis CI servers blocked by YandexMusic', }, { # multiple disks 'url': 'http://music.yandex.ru/album/3840501/track/705105', - 'md5': 'ebe7b4e2ac7ac03fe11c19727ca6153e', + 'md5': '82a54e9e787301dd45aba093cf6e58c0', 'info_dict': { 'id': '705105', 'ext': 'mp3', - 'title': 'Hooverphonic - Sometimes', - 'filesize': 5743386, + 'title': 'md5:f86d4a9188279860a83000277024c1a6', + 'filesize': int, 'duration': 239.27, - 'track': 'Sometimes', - 'album': 'The Best of Hooverphonic', - 'album_artist': 'Hooverphonic', - 'artist': 'Hooverphonic', + 'track': 'md5:40f887f0666ba1aa10b835aca44807d1', + 'album': 'md5:624f5224b14f5c88a8e812fd7fbf1873', + 'album_artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', + 'artist': 'md5:dd35f2af4e8927100cbe6f5e62e1fb12', 'release_year': 2016, 'genre': 'pop', 'disc_number': 2, 'track_number': 9, }, - 'skip': 'Travis CI servers blocked by YandexMusic', + # 'skip': 'Travis CI servers blocked by YandexMusic', }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - album_id, track_id = mobj.group('album_id'), mobj.group('id') + tld, album_id, track_id = mobj.group('tld'), mobj.group('album_id'), mobj.group('id') - track = self._download_json( - 'http://music.yandex.ru/handlers/track.jsx?track=%s:%s' % (track_id, album_id), - track_id, 'Downloading track JSON')['track'] + track = self._call_api( + 'track', tld, url, track_id, 'Downloading track JSON', + {'track': '%s:%s' % (track_id, album_id)})['track'] track_title = track['title'] download_data = self._download_json( @@ -109,8 +121,7 @@ 'Downloading track location JSON', query={'format': 'json'}) key = hashlib.md5(('XGRlBW9FXlekgbPrRHuSiA' + fd_data['path'][1:] + fd_data['s']).encode('utf-8')).hexdigest() - storage = track['storageDir'].split('.') - f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], storage[1]) + f_url = 'http://%s/get-mp3/%s/%s?track-id=%s ' % (fd_data['host'], key, fd_data['ts'] + fd_data['path'], track['id']) thumbnail = None cover_uri = track.get('albums', [{}])[0].get('coverUri') @@ -180,42 +191,85 @@ class YandexMusicPlaylistBaseIE(YandexMusicBaseIE): + def _extract_tracks(self, source, item_id, url, tld): + tracks = source['tracks'] + track_ids = [compat_str(track_id) for track_id in source['trackIds']] + + # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, + # missing tracks should be retrieved manually. + if len(tracks) < len(track_ids): + present_track_ids = set([ + compat_str(track['id']) + for track in tracks if track.get('id')]) + missing_track_ids = [ + track_id for track_id in track_ids + if track_id not in present_track_ids] + missing_tracks = self._call_api( + 'track-entries', tld, url, item_id, + 'Downloading missing tracks JSON', { + 'entries': ','.join(missing_track_ids), + 'lang': tld, + 'external-domain': 'music.yandex.%s' % tld, + 'overembed': 'false', + 'strict': 'true', + }) + if missing_tracks: + tracks.extend(missing_tracks) + + return tracks + def _build_playlist(self, tracks): - return [ - self.url_result( - 'http://music.yandex.ru/album/%s/track/%s' % (track['albums'][0]['id'], track['id'])) - for track in tracks if track.get('albums') and isinstance(track.get('albums'), list)] + entries = [] + for track in tracks: + track_id = track.get('id') or track.get('realId') + if not track_id: + continue + albums = track.get('albums') + if not albums or not isinstance(albums, list): + continue + album = albums[0] + if not isinstance(album, dict): + continue + album_id = album.get('id') + if not album_id: + continue + entries.append(self.url_result( + 'http://music.yandex.ru/album/%s/track/%s' % (album_id, track_id), + ie=YandexMusicTrackIE.ie_key(), video_id=track_id)) + return entries class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE): IE_NAME = 'yandexmusic:album' IE_DESC = 'Яндекс.Музыка - Альбом' - _VALID_URL = r'https?://music\.yandex\.(?:ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' + _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/album/(?P<id>\d+)/?(\?|$)' _TESTS = [{ 'url': 'http://music.yandex.ru/album/540508', 'info_dict': { 'id': '540508', - 'title': 'Carlo Ambrosio - Gypsy Soul (2009)', + 'title': 'md5:7ed1c3567f28d14be9f61179116f5571', }, 'playlist_count': 50, - 'skip': 'Travis CI servers blocked by YandexMusic', + # 'skip': 'Travis CI servers blocked by YandexMusic', }, { 'url': 'https://music.yandex.ru/album/3840501', 'info_dict': { 'id': '3840501', - 'title': 'Hooverphonic - The Best of Hooverphonic (2016)', + 'title': 'md5:36733472cdaa7dcb1fd9473f7da8e50f', }, 'playlist_count': 33, - 'skip': 'Travis CI servers blocked by YandexMusic', + # 'skip': 'Travis CI servers blocked by YandexMusic', }] def _real_extract(self, url): - album_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + tld = mobj.group('tld') + album_id = mobj.group('id') - album = self._download_json( - 'http://music.yandex.ru/handlers/album.jsx?album=%s' % album_id, - album_id, 'Downloading album JSON') + album = self._call_api( + 'album', tld, url, album_id, 'Downloading album JSON', + {'album': album_id}) entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) @@ -236,21 +290,24 @@ 'url': 'http://music.yandex.ru/users/music.partners/playlists/1245', 'info_dict': { 'id': '1245', - 'title': 'Что слушают Enter Shikari', + 'title': 'md5:841559b3fe2b998eca88d0d2e22a3097', 'description': 'md5:3b9f27b0efbe53f2ee1e844d07155cc9', }, - 'playlist_count': 6, - 'skip': 'Travis CI servers blocked by YandexMusic', + 'playlist_count': 5, + # 'skip': 'Travis CI servers blocked by YandexMusic', }, { - # playlist exceeding the limit of 150 tracks shipped with webpage (see - # https://github.com/ytdl-org/youtube-dl/issues/6666) 'url': 'https://music.yandex.ru/users/ya.playlist/playlists/1036', + 'only_matching': True, + }, { + # playlist exceeding the limit of 150 tracks (see + # https://github.com/ytdl-org/youtube-dl/issues/6666) + 'url': 'https://music.yandex.ru/users/mesiaz/playlists/1364', 'info_dict': { - 'id': '1036', - 'title': 'Музыка 90-х', + 'id': '1364', + 'title': 'md5:b3b400f997d3f878a13ae0699653f7db', }, - 'playlist_mincount': 300, - 'skip': 'Travis CI servers blocked by YandexMusic', + 'playlist_mincount': 437, + # 'skip': 'Travis CI servers blocked by YandexMusic', }] def _real_extract(self, url): @@ -259,16 +316,8 @@ user = mobj.group('user') playlist_id = mobj.group('id') - playlist = self._download_json( - 'https://music.yandex.%s/handlers/playlist.jsx' % tld, - playlist_id, 'Downloading missing tracks JSON', - fatal=False, - headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - 'X-Retpath-Y': url, - }, - query={ + playlist = self._call_api( + 'playlist', tld, url, playlist_id, 'Downloading playlist JSON', { 'owner': user, 'kinds': playlist_id, 'light': 'true', @@ -277,37 +326,103 @@ 'overembed': 'false', })['playlist'] - tracks = playlist['tracks'] - track_ids = [compat_str(track_id) for track_id in playlist['trackIds']] - - # tracks dictionary shipped with playlist.jsx API is limited to 150 tracks, - # missing tracks should be retrieved manually. - if len(tracks) < len(track_ids): - present_track_ids = set([ - compat_str(track['id']) - for track in tracks if track.get('id')]) - missing_track_ids = [ - track_id for track_id in track_ids - if track_id not in present_track_ids] - missing_tracks = self._download_json( - 'https://music.yandex.%s/handlers/track-entries.jsx' % tld, - playlist_id, 'Downloading missing tracks JSON', - fatal=False, - headers={ - 'Referer': url, - 'X-Requested-With': 'XMLHttpRequest', - }, - query={ - 'entries': ','.join(missing_track_ids), - 'lang': tld, - 'external-domain': 'music.yandex.%s' % tld, - 'overembed': 'false', - 'strict': 'true', - }) - if missing_tracks: - tracks.extend(missing_tracks) + tracks = self._extract_tracks(playlist, playlist_id, url, tld) return self.playlist_result( self._build_playlist(tracks), compat_str(playlist_id), playlist.get('title'), playlist.get('description')) + + +class YandexMusicArtistBaseIE(YandexMusicPlaylistBaseIE): + def _call_artist(self, tld, url, artist_id): + return self._call_api( + 'artist', tld, url, artist_id, + 'Downloading artist %s JSON' % self._ARTIST_WHAT, { + 'artist': artist_id, + 'what': self._ARTIST_WHAT, + 'sort': self._ARTIST_SORT or '', + 'dir': '', + 'period': '', + 'lang': tld, + 'external-domain': 'music.yandex.%s' % tld, + 'overembed': 'false', + }) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + tld = mobj.group('tld') + artist_id = mobj.group('id') + data = self._call_artist(tld, url, artist_id) + tracks = self._extract_tracks(data, artist_id, url, tld) + title = try_get(data, lambda x: x['artist']['name'], compat_str) + return self.playlist_result( + self._build_playlist(tracks), artist_id, title) + + +class YandexMusicArtistTracksIE(YandexMusicArtistBaseIE): + IE_NAME = 'yandexmusic:artist:tracks' + IE_DESC = 'Яндекс.Музыка - Артист - Треки' + _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/tracks' + + _TESTS = [{ + 'url': 'https://music.yandex.ru/artist/617526/tracks', + 'info_dict': { + 'id': '617526', + 'title': 'md5:131aef29d45fd5a965ca613e708c040b', + }, + 'playlist_count': 507, + # 'skip': 'Travis CI servers blocked by YandexMusic', + }] + + _ARTIST_SORT = '' + _ARTIST_WHAT = 'tracks' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + tld = mobj.group('tld') + artist_id = mobj.group('id') + data = self._call_artist(tld, url, artist_id) + tracks = self._extract_tracks(data, artist_id, url, tld) + artist = try_get(data, lambda x: x['artist']['name'], compat_str) + title = '%s - %s' % (artist or artist_id, 'Треки') + return self.playlist_result( + self._build_playlist(tracks), artist_id, title) + + +class YandexMusicArtistAlbumsIE(YandexMusicArtistBaseIE): + IE_NAME = 'yandexmusic:artist:albums' + IE_DESC = 'Яндекс.Музыка - Артист - Альбомы' + _VALID_URL = r'https?://music\.yandex\.(?P<tld>ru|kz|ua|by)/artist/(?P<id>\d+)/albums' + + _TESTS = [{ + 'url': 'https://music.yandex.ru/artist/617526/albums', + 'info_dict': { + 'id': '617526', + 'title': 'md5:55dc58d5c85699b7fb41ee926700236c', + }, + 'playlist_count': 8, + # 'skip': 'Travis CI servers blocked by YandexMusic', + }] + + _ARTIST_SORT = 'year' + _ARTIST_WHAT = 'albums' + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + tld = mobj.group('tld') + artist_id = mobj.group('id') + data = self._call_artist(tld, url, artist_id) + entries = [] + for album in data['albums']: + if not isinstance(album, dict): + continue + album_id = album.get('id') + if not album_id: + continue + entries.append(self.url_result( + 'http://music.yandex.ru/album/%s' % album_id, + ie=YandexMusicAlbumIE.ie_key(), video_id=album_id)) + artist = try_get(data, lambda x: x['artist']['name'], compat_str) + title = '%s - %s' % (artist or artist_id, 'Альбомы') + return self.playlist_result(entries, artist_id, title) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/youtube.py new/youtube-dl/youtube_dl/extractor/youtube.py --- old/youtube-dl/youtube_dl/extractor/youtube.py 2020-11-29 07:52:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/youtube.py 2020-11-29 07:55:20.000000000 +0100 @@ -602,7 +602,7 @@ 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html', } }, - # Normal age-gate video (No vevo, embed allowed) + # Normal age-gate video (No vevo, embed allowed), available via embed page { 'url': 'https://youtube.com/watch?v=HtVdAasjOgU', 'info_dict': { @@ -618,6 +618,12 @@ 'age_limit': 18, }, }, + { + # Age-gated video only available with authentication (unavailable + # via embed page workaround) + 'url': 'XgnwCQzjau8', + 'only_matching': True, + }, # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421) # YouTube Red ad is not captured for creator { @@ -1637,8 +1643,8 @@ # Get video info video_info = {} embed_webpage = None - if (self._og_search_property('restrictions:age', video_webpage, default=None) == '18+' - or re.search(r'player-age-gate-content">', video_webpage) is not None): + + if re.search(r'["\']status["\']\s*:\s*["\']LOGIN_REQUIRED', video_webpage) is not None: age_gate = True # We simulate the access to the video from www.youtube.com/v/{video_id} # this can be viewed without login into Youtube @@ -2790,12 +2796,17 @@ # TODO pass - def _shelf_entries(self, shelf_renderer): + def _shelf_entries(self, shelf_renderer, skip_channels=False): ep = try_get( shelf_renderer, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], compat_str) shelf_url = urljoin('https://www.youtube.com', ep) if shelf_url: + # Skipping links to another channels, note that checking for + # endpoint.commandMetadata.webCommandMetadata.webPageTypwebPageType == WEB_PAGE_TYPE_CHANNEL + # will not work + if skip_channels and '/channels?' in shelf_url: + return title = try_get( shelf_renderer, lambda x: x['title']['runs'][0]['text'], compat_str) yield self.url_result(shelf_url, video_title=title) @@ -2906,9 +2917,13 @@ } def _entries(self, tab, identity_token): - slr_renderer = try_get(tab, lambda x: x['sectionListRenderer'], dict) + tab_content = try_get(tab, lambda x: x['content'], dict) + if not tab_content: + return + slr_renderer = try_get(tab_content, lambda x: x['sectionListRenderer'], dict) if not slr_renderer: return + is_channels_tab = tab.get('title') == 'Channels' continuation = None slr_contents = try_get(slr_renderer, lambda x: x['contents'], list) or [] for slr_content in slr_contents: @@ -2935,7 +2950,7 @@ continue renderer = isr_content.get('shelfRenderer') if renderer: - for entry in self._shelf_entries(renderer): + for entry in self._shelf_entries(renderer, not is_channels_tab): yield entry continue renderer = isr_content.get('backstagePostThreadRenderer') @@ -3065,7 +3080,7 @@ description = None playlist_id = item_id playlist = self.playlist_result( - self._entries(selected_tab['content'], identity_token), + self._entries(selected_tab, identity_token), playlist_id=playlist_id, playlist_title=title, playlist_description=description) playlist.update(self._extract_uploader(data)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2020-11-29 07:52:58.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2020-12-01 19:37:37.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.11.29' +__version__ = '2020.12.02' _______________________________________________ openSUSE Commits mailing list -- [email protected] To unsubscribe, email [email protected] List Netiquette: https://en.opensuse.org/openSUSE:Mailing_list_netiquette List Archives: https://lists.opensuse.org/archives/list/[email protected]
