Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2020-01-08 17:55:49 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.6675 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Wed Jan 8 17:55:49 2020 rev:123 rq:761709 version:2020.01.01 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2019-12-25 10:55:13.957637412 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.6675/python-youtube-dl.changes 2020-01-08 17:56:10.629692170 +0100 @@ -1,0 +2,11 @@ +Tue Dec 31 23:48:47 UTC 2019 - Jan Engelhardt <[email protected]> + +- Update to release 2020.01.01 + * soundcloud: fix client id extraction for non fatal requests + * prosiebensat1: improve geo restriction handling + * brightcove: cache brightcove player policy keys + * scrippsnetworks: Add new extractor + * mitele: fix extraction + * soundcloud: Update client id + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2019.12.25.tar.gz youtube-dl-2019.12.25.tar.gz.sig New: ---- youtube-dl-2020.01.01.tar.gz youtube-dl-2020.01.01.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.JPLToA/_old 2020-01-08 17:56:12.813693319 +0100 +++ /var/tmp/diff_new_pack.JPLToA/_new 2020-01-08 17:56:12.817693321 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-youtube-dl # -# Copyright (c) 2019 SUSE LLC +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -19,12 +19,13 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2019.12.25 +Version: 2020.01.01 Release: 0 Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 Group: Development/Languages/Python -URL: http://rg3.github.io/youtube-dl/ +URL: https://yt-dl.org/ +#Git-Clone: https://github.com/ytdl-org/youtube-dl Source: http://youtube-dl.org/downloads/%version/%modname-%version.tar.gz Source2: http://youtube-dl.org/downloads/%version/%modname-%version.tar.gz.sig Source3: %modname.keyring ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.JPLToA/_old 2020-01-08 17:56:12.837693332 +0100 +++ /var/tmp/diff_new_pack.JPLToA/_new 2020-01-08 17:56:12.841693333 +0100 @@ -1,7 +1,7 @@ # # spec file for package youtube-dl # -# Copyright (c) 2019 SUSE LLC +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2019.12.25 +Version: 2020.01.01 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2019.12.25.tar.gz -> youtube-dl-2020.01.01.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2019-12-24 19:16:45.000000000 +0100 +++ new/youtube-dl/ChangeLog 2019-12-31 23:24:55.000000000 +0100 @@ -1,3 +1,24 @@ +version 2020.01.01 + +Extractors +* [brightcove] Invalidate policy key cache on failing requests +* [pornhub] Improve locked videos detection (#22449, #22780) ++ [pornhub] Add support for m3u8 formats +* [pornhub] Fix extraction (#22749, #23082) +* [brightcove] Update policy key on failing requests +* [spankbang] Improve removed video detection (#23423) +* [spankbang] Fix extraction (#23307, #23423, #23444) +* [soundcloud] Automatically update client id on failing requests +* [prosiebensat1] Improve geo restriction handling (#23571) +* [brightcove] Cache brightcove player policy keys +* [teachable] Fail with error message if no video URL found +* [teachable] Improve locked lessons detection (#23528) ++ [scrippsnetworks] Add support for Scripps Networks sites (#19857, #22981) +* [mitele] Fix extraction (#21354, #23456) +* [soundcloud] Update client id (#23516) +* [mailru] Relax URL regular expressions (#23509) + + version 2019.12.25 Core diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/devscripts/create-github-release.py new/youtube-dl/devscripts/create-github-release.py --- old/youtube-dl/devscripts/create-github-release.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/devscripts/create-github-release.py 2019-12-31 23:24:34.000000000 +0100 @@ -15,7 +15,6 @@ from youtube_dl.compat import ( compat_basestring, - compat_input, compat_getpass, compat_print, compat_urllib_request, @@ -40,28 +39,20 @@ try: info = netrc.netrc().authenticators(self._NETRC_MACHINE) if info is not None: - self._username = info[0] - self._password = info[2] + self._token = info[2] compat_print('Using GitHub credentials found in .netrc...') return else: compat_print('No GitHub credentials found in .netrc') except (IOError, netrc.NetrcParseError): compat_print('Unable to parse .netrc') - self._username = compat_input( - 'Type your GitHub username or email address and press [Return]: ') - self._password = compat_getpass( - 'Type your GitHub password and press [Return]: ') + self._token = compat_getpass( + 'Type your GitHub PAT (personal access token) and press [Return]: ') def _call(self, req): if isinstance(req, compat_basestring): req = sanitized_Request(req) - # Authorizing manually since GitHub does not response with 401 with - # WWW-Authenticate header set (see - # https://developer.github.com/v3/#basic-authentication) - b64 = base64.b64encode( - ('%s:%s' % (self._username, self._password)).encode('utf-8')).decode('ascii') - req.add_header('Authorization', 'Basic %s' % b64) + req.add_header('Authorization', 'token %s' % self._token) response = self._opener.open(req).read().decode('utf-8') return json.loads(response) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2019-12-24 19:16:48.000000000 +0100 +++ new/youtube-dl/docs/supportedsites.md 2019-12-31 23:24:58.000000000 +0100 @@ -761,6 +761,7 @@ - **screen.yahoo:search**: Yahoo screen search - **Screencast** - **ScreencastOMatic** + - **ScrippsNetworks** - **scrippsnetworks:watch** - **SCTE** - **SCTECourse** Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/brightcove.py new/youtube-dl/youtube_dl/extractor/brightcove.py --- old/youtube-dl/youtube_dl/extractor/brightcove.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/brightcove.py 2019-12-31 23:24:34.000000000 +0100 @@ -586,45 +586,63 @@ account_id, player_id, embed, content_type, video_id = re.match(self._VALID_URL, url).groups() - webpage = self._download_webpage( - 'http://players.brightcove.net/%s/%s_%s/index.min.js' - % (account_id, player_id, embed), video_id) - - policy_key = None - - catalog = self._search_regex( - r'catalog\(({.+?})\);', webpage, 'catalog', default=None) - if catalog: - catalog = self._parse_json( - js_to_json(catalog), video_id, fatal=False) + policy_key_id = '%s_%s' % (account_id, player_id) + policy_key = self._downloader.cache.load('brightcove', policy_key_id) + policy_key_extracted = False + store_pk = lambda x: self._downloader.cache.store('brightcove', policy_key_id, x) + + def extract_policy_key(): + webpage = self._download_webpage( + 'http://players.brightcove.net/%s/%s_%s/index.min.js' + % (account_id, player_id, embed), video_id) + + policy_key = None + + catalog = self._search_regex( + r'catalog\(({.+?})\);', webpage, 'catalog', default=None) if catalog: - policy_key = catalog.get('policyKey') + catalog = self._parse_json( + js_to_json(catalog), video_id, fatal=False) + if catalog: + policy_key = catalog.get('policyKey') + + if not policy_key: + policy_key = self._search_regex( + r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', + webpage, 'policy key', group='pk') - if not policy_key: - policy_key = self._search_regex( - r'policyKey\s*:\s*(["\'])(?P<pk>.+?)\1', - webpage, 'policy key', group='pk') + store_pk(policy_key) + return policy_key api_url = 'https://edge.api.brightcove.com/playback/v1/accounts/%s/%ss/%s' % (account_id, content_type, video_id) - headers = { - 'Accept': 'application/json;pk=%s' % policy_key, - } + headers = {} referrer = smuggled_data.get('referrer') if referrer: headers.update({ 'Referer': referrer, 'Origin': re.search(r'https?://[^/]+', referrer).group(0), }) - try: - json_data = self._download_json(api_url, video_id, headers=headers) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: - json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - message = json_data.get('message') or json_data['error_code'] - if json_data.get('error_subcode') == 'CLIENT_GEO': - self.raise_geo_restricted(msg=message) - raise ExtractorError(message, expected=True) - raise + + for _ in range(2): + if not policy_key: + policy_key = extract_policy_key() + policy_key_extracted = True + headers['Accept'] = 'application/json;pk=%s' % policy_key + try: + json_data = self._download_json(api_url, video_id, headers=headers) + break + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (401, 403): + json_data = self._parse_json(e.cause.read().decode(), video_id)[0] + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + elif json_data.get('error_code') == 'INVALID_POLICY_KEY' and not policy_key_extracted: + policy_key = None + store_pk(None) + continue + raise ExtractorError(message, expected=True) + raise errors = json_data.get('errors') if errors and errors[0].get('error_subcode') == 'TVE_AUTH': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/extractors.py new/youtube-dl/youtube_dl/extractor/extractors.py --- old/youtube-dl/youtube_dl/extractor/extractors.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/extractors.py 2019-12-31 23:24:34.000000000 +0100 @@ -963,7 +963,10 @@ from .sbs import SBSIE from .screencast import ScreencastIE from .screencastomatic import ScreencastOMaticIE -from .scrippsnetworks import ScrippsNetworksWatchIE +from .scrippsnetworks import ( + ScrippsNetworksWatchIE, + ScrippsNetworksIE, +) from .scte import ( SCTEIE, SCTECourseIE, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mailru.py new/youtube-dl/youtube_dl/extractor/mailru.py --- old/youtube-dl/youtube_dl/extractor/mailru.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mailru.py 2019-12-31 23:24:34.000000000 +0100 @@ -20,10 +20,10 @@ IE_DESC = 'Видео@Mail.Ru' _VALID_URL = r'''(?x) https?:// - (?:(?:www|m)\.)?my\.mail\.ru/ + (?:(?:www|m)\.)?my\.mail\.ru/+ (?: video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)| - (?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| + (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| (?:video/embed|\+/video/meta)/(?P<metaid>\d+) ) ''' @@ -85,6 +85,14 @@ { 'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html', + 'only_matching': True, + }, + { + 'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', + 'only_matching': True, } ] @@ -237,7 +245,7 @@ class MailRuMusicIE(MailRuMusicSearchBaseIE): IE_NAME = 'mailru:music' IE_DESC = 'Музыка@Mail.Ru' - _VALID_URL = r'https?://my\.mail\.ru/music/songs/[^/?#&]+-(?P<id>[\da-f]+)' + _VALID_URL = r'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)' _TESTS = [{ 'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', 'md5': '0f8c22ef8c5d665b13ac709e63025610', @@ -273,7 +281,7 @@ class MailRuMusicSearchIE(MailRuMusicSearchBaseIE): IE_NAME = 'mailru:music:search' IE_DESC = 'Музыка@Mail.Ru' - _VALID_URL = r'https?://my\.mail\.ru/music/search/(?P<id>[^/?#&]+)' + _VALID_URL = r'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://my.mail.ru/music/search/black%20shadow', 'info_dict': { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/mitele.py new/youtube-dl/youtube_dl/extractor/mitele.py --- old/youtube-dl/youtube_dl/extractor/mitele.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/mitele.py 2019-12-31 23:24:34.000000000 +0100 @@ -4,8 +4,8 @@ from .common import InfoExtractor from ..utils import ( int_or_none, + parse_iso8601, smuggle_url, - parse_duration, ) @@ -18,16 +18,18 @@ 'info_dict': { 'id': 'FhYW1iNTE6J6H7NkQRIEzfne6t2quqPg', 'ext': 'mp4', - 'title': 'Tor, la web invisible', - 'description': 'md5:3b6fce7eaa41b2d97358726378d9369f', + 'title': 'Diario de La redacción Programa 144', + 'description': 'md5:07c35a7b11abb05876a6a79185b58d27', 'series': 'Diario de', - 'season': 'La redacción', + 'season': 'Season 14', 'season_number': 14, - 'season_id': 'diario_de_t14_11981', - 'episode': 'Programa 144', + 'episode': 'Tor, la web invisible', 'episode_number': 3, 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 2913, + 'age_limit': 16, + 'timestamp': 1471209401, + 'upload_date': '20160814', }, 'add_ie': ['Ooyala'], }, { @@ -39,13 +41,15 @@ 'title': 'Cuarto Milenio Temporada 6 Programa 226', 'description': 'md5:5ff132013f0cd968ffbf1f5f3538a65f', 'series': 'Cuarto Milenio', - 'season': 'Temporada 6', + 'season': 'Season 6', 'season_number': 6, - 'season_id': 'cuarto_milenio_t06_12715', - 'episode': 'Programa 226', + 'episode': 'Episode 24', 'episode_number': 24, 'thumbnail': r're:(?i)^https?://.*\.jpg$', 'duration': 7313, + 'age_limit': 12, + 'timestamp': 1471209021, + 'upload_date': '20160814', }, 'params': { 'skip_download': True, @@ -54,67 +58,36 @@ }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, + }, { + 'url': 'https://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144-40_1006364575251/player/', + 'only_matching': True, }] def _real_extract(self, url): - video_id = self._match_id(url) - - paths = self._download_json( - 'https://www.mitele.es/amd/agp/web/metadata/general_configuration', - video_id, 'Downloading paths JSON') - - ooyala_s = paths['general_configuration']['api_configuration']['ooyala_search'] - base_url = ooyala_s.get('base_url', 'cdn-search-mediaset.carbyne.ps.ooyala.com') - full_path = ooyala_s.get('full_path', '/search/v1/full/providers/') - source = self._download_json( - '%s://%s%s%s/docs/%s' % ( - ooyala_s.get('protocol', 'https'), base_url, full_path, - ooyala_s.get('provider_id', '104951'), video_id), - video_id, 'Downloading data JSON', query={ - 'include_titles': 'Series,Season', - 'product_name': ooyala_s.get('product_name', 'test'), - 'format': 'full', - })['hits']['hits'][0]['_source'] - - embedCode = source['offers'][0]['embed_codes'][0] - titles = source['localizable_titles'][0] - - title = titles.get('title_medium') or titles['title_long'] - - description = titles.get('summary_long') or titles.get('summary_medium') - - def get(key1, key2): - value1 = source.get(key1) - if not value1 or not isinstance(value1, list): - return - if not isinstance(value1[0], dict): - return - return value1[0].get(key2) - - series = get('localizable_titles_series', 'title_medium') - - season = get('localizable_titles_season', 'title_medium') - season_number = int_or_none(source.get('season_number')) - season_id = source.get('season_id') - - episode = titles.get('title_sort_name') - episode_number = int_or_none(source.get('episode_number')) - - duration = parse_duration(get('videos', 'duration')) + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + pre_player = self._parse_json(self._search_regex( + r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', + webpage, 'Pre Player'), display_id)['prePlayer'] + title = pre_player['title'] + video = pre_player['video'] + video_id = video['dataMediaId'] + content = pre_player.get('content') or {} + info = content.get('info') or {} return { '_type': 'url_transparent', # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + embedCode, {'supportedformats': 'm3u8,dash'}), + 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}), 'id': video_id, 'title': title, - 'description': description, - 'series': series, - 'season': season, - 'season_number': season_number, - 'season_id': season_id, - 'episode': episode, - 'episode_number': episode_number, - 'duration': duration, - 'thumbnail': get('images', 'url'), + 'description': info.get('synopsis'), + 'series': content.get('title'), + 'season_number': int_or_none(info.get('season_number')), + 'episode': content.get('subtitle'), + 'episode_number': int_or_none(info.get('episode_number')), + 'duration': int_or_none(info.get('duration')), + 'thumbnail': video.get('dataPoster'), + 'age_limit': int_or_none(info.get('rating')), + 'timestamp': parse_iso8601(pre_player.get('publishedTime')), } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/pornhub.py new/youtube-dl/youtube_dl/extractor/pornhub.py --- old/youtube-dl/youtube_dl/extractor/pornhub.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/pornhub.py 2019-12-31 23:24:34.000000000 +0100 @@ -17,6 +17,7 @@ determine_ext, ExtractorError, int_or_none, + NO_DEFAULT, orderedSet, remove_quotes, str_to_int, @@ -227,12 +228,13 @@ else: thumbnail, duration = [None] * 2 - if not video_urls: - tv_webpage = dl_webpage('tv') - + def extract_js_vars(webpage, pattern, default=NO_DEFAULT): assignments = self._search_regex( - r'(var.+?mediastring.+?)</script>', tv_webpage, - 'encoded url').split(';') + pattern, webpage, 'encoded url', default=default) + if not assignments: + return {} + + assignments = assignments.split(';') js_vars = {} @@ -254,11 +256,35 @@ assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) + return js_vars - video_url = js_vars['mediastring'] - if video_url not in video_urls_set: - video_urls.append((video_url, None)) - video_urls_set.add(video_url) + def add_video_url(video_url): + v_url = url_or_none(video_url) + if not v_url: + return + if v_url in video_urls_set: + return + video_urls.append((v_url, None)) + video_urls_set.add(v_url) + + if not video_urls: + FORMAT_PREFIXES = ('media', 'quality') + js_vars = extract_js_vars( + webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES), + default=None) + if js_vars: + for key, format_url in js_vars.items(): + if any(key.startswith(p) for p in FORMAT_PREFIXES): + add_video_url(format_url) + if not video_urls and re.search( + r'<[^>]+\bid=["\']lockedPlayer', webpage): + raise ExtractorError( + 'Video %s is locked' % video_id, expected=True) + + if not video_urls: + js_vars = extract_js_vars( + dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') + add_video_url(js_vars['mediastring']) for mobj in re.finditer( r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', @@ -276,10 +302,16 @@ r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') - if determine_ext(video_url) == 'mpd': + ext = determine_ext(video_url) + if ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) continue + elif ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + video_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + continue tbr = None mobj = re.search(r'(?P<height>\d+)[pP]?_(?P<tbr>\d+)[kK]', video_url) if mobj: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/prosiebensat1.py new/youtube-dl/youtube_dl/extractor/prosiebensat1.py --- old/youtube-dl/youtube_dl/extractor/prosiebensat1.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/prosiebensat1.py 2019-12-31 23:24:34.000000000 +0100 @@ -16,7 +16,7 @@ class ProSiebenSat1BaseIE(InfoExtractor): - _GEO_COUNTRIES = ['DE'] + _GEO_BYPASS = False _ACCESS_ID = None _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' @@ -39,14 +39,18 @@ formats = [] if self._ACCESS_ID: raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID - server_token = (self._download_json( + protocols = self._download_json( self._V4_BASE_URL + 'protocols', clip_id, 'Downloading protocols JSON', headers=self.geo_verification_headers(), query={ 'access_id': self._ACCESS_ID, 'client_token': sha1((raw_ct).encode()).hexdigest(), 'video_id': clip_id, - }, fatal=False) or {}).get('server_token') + }, fatal=False, expected_status=(403,)) or {} + error = protocols.get('error') or {} + if error.get('title') == 'Geo check failed': + self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) + server_token = protocols.get('server_token') if server_token: urls = (self._download_json( self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/scrippsnetworks.py new/youtube-dl/youtube_dl/extractor/scrippsnetworks.py --- old/youtube-dl/youtube_dl/extractor/scrippsnetworks.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/scrippsnetworks.py 2019-12-31 23:24:34.000000000 +0100 @@ -7,6 +7,7 @@ from .aws import AWSIE from .anvato import AnvatoIE +from .common import InfoExtractor from ..utils import ( smuggle_url, urlencode_postdata, @@ -102,3 +103,46 @@ 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id, {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) + + +class ScrippsNetworksIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)' + _TESTS = [{ + 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', + 'info_dict': { + 'id': '0260338', + 'ext': 'mp4', + 'title': 'The Best of the Best', + 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', + 'timestamp': 1475678834, + 'upload_date': '20161005', + 'uploader': 'SCNI-SCND', + }, + 'add_ie': ['ThePlatform'], + }, { + 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', + 'only_matching': True, + }, { + 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', + 'only_matching': True, + }, { + 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', + 'only_matching': True, + }, { + 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', + 'only_matching': True, + }] + _ACCOUNT_MAP = { + 'cookingchanneltv': 2433005105, + 'diynetwork': 2433004575, + 'foodnetwork': 2433005105, + 'hgtv': 2433004575, + 'travelchannel': 2433005739, + } + _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' + + def _real_extract(self, url): + site, guid = re.match(self._VALID_URL, url).groups() + return self.url_result(smuggle_url( + self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), + {'force_smil_url': True}), 'ThePlatform', guid) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/soundcloud.py new/youtube-dl/youtube_dl/extractor/soundcloud.py --- old/youtube-dl/youtube_dl/extractor/soundcloud.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/soundcloud.py 2019-12-31 23:24:34.000000000 +0100 @@ -9,10 +9,13 @@ SearchInfoExtractor ) from ..compat import ( + compat_HTTPError, + compat_kwargs, compat_str, compat_urlparse, ) from ..utils import ( + error_to_compat_str, ExtractorError, float_or_none, HEADRequest, @@ -255,7 +258,6 @@ _API_BASE = 'https://api.soundcloud.com/' _API_V2_BASE = 'https://api-v2.soundcloud.com/' _BASE_URL = 'https://soundcloud.com/' - _CLIENT_ID = 'UW9ajvMgVdMMW3cdeBi8lPfN6dvOVGji' _IMAGE_REPL_RE = r'-([0-9a-z]+)\.jpg' _ARTWORK_MAP = { @@ -271,9 +273,49 @@ 'original': 0, } + def _store_client_id(self, client_id): + self._downloader.cache.store('soundcloud', 'client_id', client_id) + + def _update_client_id(self): + webpage = self._download_webpage('https://soundcloud.com/', None) + for src in reversed(re.findall(r'<script[^>]+src="([^"]+)"', webpage)): + script = self._download_webpage(src, None, fatal=False) + if script: + client_id = self._search_regex( + r'client_id\s*:\s*"([0-9a-zA-Z]{32})"', + script, 'client id', default=None) + if client_id: + self._CLIENT_ID = client_id + self._store_client_id(client_id) + return + raise ExtractorError('Unable to extract client id') + + def _download_json(self, *args, **kwargs): + non_fatal = kwargs.get('fatal') is False + if non_fatal: + del kwargs['fatal'] + query = kwargs.get('query', {}).copy() + for _ in range(2): + query['client_id'] = self._CLIENT_ID + kwargs['query'] = query + try: + return super(SoundcloudIE, self)._download_json(*args, **compat_kwargs(kwargs)) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: + self._store_client_id(None) + self._update_client_id() + continue + elif non_fatal: + self._downloader.report_warning(error_to_compat_str(e)) + return False + raise + + def _real_initialize(self): + self._CLIENT_ID = self._downloader.cache.load('soundcloud', 'client_id') or 'YUKXoArFcqrlQn9tfNHvvyfnDISj04zk' + @classmethod def _resolv_url(cls, url): - return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url + '&client_id=' + cls._CLIENT_ID + return SoundcloudIE._API_V2_BASE + 'resolve?url=' + url def _extract_info_dict(self, info, full_title=None, secret_token=None, version=2): track_id = compat_str(info['id']) @@ -451,9 +493,7 @@ track_id = mobj.group('track_id') - query = { - 'client_id': self._CLIENT_ID, - } + query = {} if track_id: info_json_url = self._API_V2_BASE + 'tracks/' + track_id full_title = track_id @@ -536,7 +576,6 @@ def _extract_playlist(self, base_url, playlist_id, playlist_title): COMMON_QUERY = { 'limit': 2000000000, - 'client_id': self._CLIENT_ID, 'linked_partitioning': '1', } @@ -722,9 +761,7 @@ mobj = re.match(self._VALID_URL, url) playlist_id = mobj.group('id') - query = { - 'client_id': self._CLIENT_ID, - } + query = {} token = mobj.group('token') if token: query['secret_token'] = token @@ -761,7 +798,6 @@ self._MAX_RESULTS_PER_PAGE) query.update({ 'limit': limit, - 'client_id': self._CLIENT_ID, 'linked_partitioning': 1, 'offset': 0, }) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/spankbang.py new/youtube-dl/youtube_dl/extractor/spankbang.py --- old/youtube-dl/youtube_dl/extractor/spankbang.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/spankbang.py 2019-12-31 23:24:34.000000000 +0100 @@ -4,6 +4,7 @@ from .common import InfoExtractor from ..utils import ( + determine_ext, ExtractorError, merge_dicts, orderedSet, @@ -64,7 +65,7 @@ url.replace('/%s/embed' % video_id, '/%s/video' % video_id), video_id, headers={'Cookie': 'country=US'}) - if re.search(r'<[^>]+\bid=["\']video_removed', webpage): + if re.search(r'<[^>]+\b(?:id|class)=["\']video_removed', webpage): raise ExtractorError( 'Video %s is not available' % video_id, expected=True) @@ -75,11 +76,20 @@ if not f_url: return f = parse_resolution(format_id) - f.update({ - 'url': f_url, - 'format_id': format_id, - }) - formats.append(f) + ext = determine_ext(f_url) + if format_id.startswith('m3u8') or ext == 'm3u8': + formats.extend(self._extract_m3u8_formats( + f_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) + elif format_id.startswith('mpd') or ext == 'mpd': + formats.extend(self._extract_mpd_formats( + f_url, video_id, mpd_id='dash', fatal=False)) + elif ext == 'mp4' or f.get('width') or f.get('height'): + f.update({ + 'url': f_url, + 'format_id': format_id, + }) + formats.append(f) STREAM_URL_PREFIX = 'stream_url_' @@ -93,28 +103,22 @@ r'data-streamkey\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage, 'stream key', group='value') - sb_csrf_session = self._get_cookies( - 'https://spankbang.com')['sb_csrf_session'].value - stream = self._download_json( 'https://spankbang.com/api/videos/stream', video_id, 'Downloading stream JSON', data=urlencode_postdata({ 'id': stream_key, 'data': 0, - 'sb_csrf_session': sb_csrf_session, }), headers={ 'Referer': url, - 'X-CSRFToken': sb_csrf_session, + 'X-Requested-With': 'XMLHttpRequest', }) for format_id, format_url in stream.items(): - if format_id.startswith(STREAM_URL_PREFIX): - if format_url and isinstance(format_url, list): - format_url = format_url[0] - extract_format( - format_id[len(STREAM_URL_PREFIX):], format_url) + if format_url and isinstance(format_url, list): + format_url = format_url[0] + extract_format(format_id, format_url) - self._sort_formats(formats) + self._sort_formats(formats, field_preference=('preference', 'height', 'width', 'fps', 'tbr', 'format_id')) info = self._search_json_ld(webpage, video_id, default={}) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/teachable.py new/youtube-dl/youtube_dl/extractor/teachable.py --- old/youtube-dl/youtube_dl/extractor/teachable.py 2019-12-24 19:16:18.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/teachable.py 2019-12-31 23:24:34.000000000 +0100 @@ -165,8 +165,12 @@ if any(re.search(p, webpage) for p in ( r'class=["\']lecture-contents-locked', r'>\s*Lecture contents locked', - r'id=["\']lecture-locked')): + r'id=["\']lecture-locked', + # https://academy.tailoredtutors.co.uk/courses/108779/lectures/1955313 + r'class=["\'](?:inner-)?lesson-locked', + r'>LESSON LOCKED<')): self.raise_login_required('Lecture contents locked') + raise ExtractorError('Unable to find video URL') title = self._og_search_title(webpage, default=None) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2019-12-24 19:16:45.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2019-12-31 23:24:55.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2019.12.25' +__version__ = '2020.01.01'
