Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2020-12-05 20:37:05 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.5913 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Sat Dec 5 20:37:05 2020 rev:148 rq:853188 version:2020.12.05 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2020-12-02 13:58:59.845850204 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.5913/python-youtube-dl.changes 2020-12-05 20:37:59.290757840 +0100 @@ -1,0 +2,8 @@ +Fri Dec 4 20:57:46 UTC 2020 - Jan Engelhardt <[email protected]> + +- Update to release 2020.12.05 + * gamespot: Extract DASH and HTTP formats + * zdf: extract webm formats + * nrk: improve format extraction and geo-restriction detection + +------------------------------------------------------------------- youtube-dl.changes: same change Old: ---- youtube-dl-2020.12.02.tar.gz youtube-dl-2020.12.02.tar.gz.sig New: ---- youtube-dl-2020.12.05.tar.gz youtube-dl-2020.12.05.tar.gz.sig ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.3kVZoL/_old 2020-12-05 20:38:00.198758762 +0100 +++ /var/tmp/diff_new_pack.3kVZoL/_new 2020-12-05 20:38:00.202758766 +0100 @@ -19,7 +19,7 @@ %define modname youtube-dl %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-youtube-dl -Version: 2020.12.02 +Version: 2020.12.05 Release: 0 Summary: A Python module for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.3kVZoL/_old 2020-12-05 20:38:00.230758795 +0100 +++ /var/tmp/diff_new_pack.3kVZoL/_new 2020-12-05 20:38:00.234758798 +0100 @@ -17,7 +17,7 @@ Name: youtube-dl -Version: 2020.12.02 +Version: 2020.12.05 Release: 0 Summary: A tool for downloading from video sites for offline watching License: SUSE-Public-Domain AND CC-BY-SA-3.0 ++++++ youtube-dl-2020.12.02.tar.gz -> youtube-dl-2020.12.05.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/ChangeLog new/youtube-dl/ChangeLog --- old/youtube-dl/ChangeLog 2020-12-01 19:37:37.000000000 +0100 +++ new/youtube-dl/ChangeLog 2020-12-04 21:39:56.000000000 +0100 @@ -1,3 +1,24 @@ +version 2020.12.05 + +Core +* [extractor/common] Improve Akamai HTTP format extraction + * Allow m3u8 manifest without an additional audio format + * Fix extraction for qualities starting with a number + +Extractors +* [teachable:course] Improve extraction (#24507, #27286) +* [nrk] Improve error extraction +* [nrktv:series] Improve extraction (#21926) +* [nrktv:season] Improve extraction +* [nrk] Improve format extraction and geo-restriction detection (#24221) +* [pornhub] Handle HTTP errors gracefully (#26414) +* [nrktv] Relax URL regular expression (#27299, #26185) ++ [zdf] Extract webm formats (#26659) ++ [gamespot] Extract DASH and HTTP formats ++ [tver] Add support for tver.jp (#26662, #27284) ++ [pornhub] Add support for pornhub.org (#27276) + + version 2020.12.02 Extractors diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/docs/supportedsites.md new/youtube-dl/docs/supportedsites.md --- old/youtube-dl/docs/supportedsites.md 2020-12-01 19:37:40.000000000 +0100 +++ new/youtube-dl/docs/supportedsites.md 2020-12-04 21:40:00.000000000 +0100 @@ -308,6 +308,7 @@ - **FrontendMasters** - **FrontendMastersCourse** - **FrontendMastersLesson** + - **FujiTVFODPlus7** - **Funimation** - **Funk** - **Fusion** @@ -710,6 +711,7 @@ - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - **QuantumTV** + - **Qub** - **Quickline** - **QuicklineLive** - **R7** @@ -952,6 +954,7 @@ - **TVANouvellesArticle** - **TVC** - **TVCArticle** + - **TVer** - **tvigle**: Интернет-телевидение Tvigle.ru - **tvland.com** - **TVN24** Binary files old/youtube-dl/youtube-dl and new/youtube-dl/youtube-dl differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/common.py new/youtube-dl/youtube_dl/extractor/common.py --- old/youtube-dl/youtube_dl/extractor/common.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/common.py 2020-12-01 19:40:52.000000000 +0100 @@ -2614,33 +2614,32 @@ hls_host = hosts.get('hls') if hls_host: m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url) - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False) + formats.extend(m3u8_formats) http_host = hosts.get('http') - if http_host and 'hdnea=' not in manifest_url: - REPL_REGEX = r'https://[^/]+/i/([^,]+),([^/]+),([^/]+).csmil/.+' + if http_host and m3u8_formats and 'hdnea=' not in m3u8_url: + REPL_REGEX = r'https?://[^/]+/i/([^,]+),([^/]+),([^/]+)\.csmil/.+' qualities = re.match(REPL_REGEX, m3u8_url).group(2).split(',') qualities_length = len(qualities) - if len(formats) in (qualities_length + 1, qualities_length * 2 + 1): + if len(m3u8_formats) in (qualities_length, qualities_length + 1): i = 0 - http_formats = [] - for f in formats: - if f['protocol'] == 'm3u8_native' and f['vcodec'] != 'none': + for f in m3u8_formats: + if f['vcodec'] != 'none': for protocol in ('http', 'https'): http_f = f.copy() del http_f['manifest_url'] http_url = re.sub( - REPL_REGEX, protocol + r'://%s/\1%s\3' % (http_host, qualities[i]), f['url']) + REPL_REGEX, protocol + r'://%s/\g<1>%s\3' % (http_host, qualities[i]), f['url']) http_f.update({ 'format_id': http_f['format_id'].replace('hls-', protocol + '-'), 'url': http_url, 'protocol': protocol, }) - http_formats.append(http_f) + formats.append(http_f) i += 1 - formats.extend(http_formats) return formats diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/extractors.py new/youtube-dl/youtube_dl/extractor/extractors.py --- old/youtube-dl/youtube_dl/extractor/extractors.py 2020-11-29 07:55:20.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/extractors.py 2020-12-01 19:40:52.000000000 +0100 @@ -393,6 +393,7 @@ FrontendMastersLessonIE, FrontendMastersCourseIE ) +from .fujitv import FujiTVFODPlus7IE from .funimation import FunimationIE from .funk import FunkIE from .fusion import FusionIE @@ -1221,7 +1222,10 @@ from .tv2hu import TV2HuIE from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE -from .tva import TVAIE +from .tva import ( + TVAIE, + QubIE, +) from .tvanouvelles import ( TVANouvellesIE, TVANouvellesArticleIE, @@ -1230,6 +1234,7 @@ TVCIE, TVCArticleIE, ) +from .tver import TVerIE from .tvigle import TvigleIE from .tvland import TVLandIE from .tvn24 import TVN24IE diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/fujitv.py new/youtube-dl/youtube_dl/extractor/fujitv.py --- old/youtube-dl/youtube_dl/extractor/fujitv.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/fujitv.py 2020-12-01 19:40:52.000000000 +0100 @@ -0,0 +1,35 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor + + +class FujiTVFODPlus7IE(InfoExtractor): + _VALID_URL = r'https?://i\.fod\.fujitv\.co\.jp/plus7/web/[0-9a-z]{4}/(?P<id>[0-9a-z]+)' + _BASE_URL = 'http://i.fod.fujitv.co.jp/' + _BITRATE_MAP = { + 300: (320, 180), + 800: (640, 360), + 1200: (1280, 720), + 2000: (1280, 720), + } + + def _real_extract(self, url): + video_id = self._match_id(url) + formats = self._extract_m3u8_formats( + self._BASE_URL + 'abr/pc_html5/%s.m3u8' % video_id, video_id) + for f in formats: + wh = self._BITRATE_MAP.get(f.get('tbr')) + if wh: + f.update({ + 'width': wh[0], + 'height': wh[1], + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': video_id, + 'formats': formats, + 'thumbnail': self._BASE_URL + 'pc/image/wbtn/wbtn_%s.jpg' % video_id, + } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/gamespot.py new/youtube-dl/youtube_dl/extractor/gamespot.py --- old/youtube-dl/youtube_dl/extractor/gamespot.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/gamespot.py 2020-12-01 19:40:52.000000000 +0100 @@ -1,16 +1,7 @@ from __future__ import unicode_literals -import re - from .once import OnceIE -from ..compat import ( - compat_urllib_parse_unquote, -) -from ..utils import ( - unescapeHTML, - url_basename, - dict_get, -) +from ..compat import compat_urllib_parse_unquote class GameSpotIE(OnceIE): @@ -24,17 +15,16 @@ 'title': 'Arma 3 - Community Guide: SITREP I', 'description': 'Check out this video where some of the basics of Arma 3 is explained.', }, + 'skip': 'manifest URL give HTTP Error 404: Not Found', }, { 'url': 'http://www.gamespot.com/videos/the-witcher-3-wild-hunt-xbox-one-now-playing/2300-6424837/', + 'md5': '173ea87ad762cf5d3bf6163dceb255a6', 'info_dict': { 'id': 'gs-2300-6424837', 'ext': 'mp4', 'title': 'Now Playing - The Witcher 3: Wild Hunt', 'description': 'Join us as we take a look at the early hours of The Witcher 3: Wild Hunt and more.', }, - 'params': { - 'skip_download': True, # m3u8 downloads - }, }, { 'url': 'https://www.gamespot.com/videos/embed/6439218/', 'only_matching': True, @@ -49,90 +39,40 @@ def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) - data_video_json = self._search_regex( - r'data-video=["\'](.*?)["\']', webpage, 'data video') - data_video = self._parse_json(unescapeHTML(data_video_json), page_id) + data_video = self._parse_json(self._html_search_regex( + r'data-video=(["\'])({.*?})\1', webpage, + 'video data', group=2), page_id) + title = compat_urllib_parse_unquote(data_video['title']) streams = data_video['videoStreams'] - - manifest_url = None formats = [] - f4m_url = streams.get('f4m_stream') - if f4m_url: - manifest_url = f4m_url - formats.extend(self._extract_f4m_formats( - f4m_url + '?hdcore=3.7.0', page_id, f4m_id='hds', fatal=False)) - m3u8_url = dict_get(streams, ('m3u8_stream', 'adaptive_stream')) + + m3u8_url = streams.get('adaptive_stream') if m3u8_url: - manifest_url = m3u8_url m3u8_formats = self._extract_m3u8_formats( m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) - formats.extend(m3u8_formats) - progressive_url = dict_get( - streams, ('progressive_hd', 'progressive_high', 'progressive_low', 'other_lr')) - if progressive_url and manifest_url: - qualities_basename = self._search_regex( - r'/([^/]+)\.csmil/', - manifest_url, 'qualities basename', default=None) - if qualities_basename: - QUALITIES_RE = r'((,\d+)+,?)' - qualities = self._search_regex( - QUALITIES_RE, qualities_basename, - 'qualities', default=None) - if qualities: - qualities = list(map(lambda q: int(q), qualities.strip(',').split(','))) - qualities.sort() - http_template = re.sub(QUALITIES_RE, r'%d', qualities_basename) - http_url_basename = url_basename(progressive_url) - if m3u8_formats: - self._sort_formats(m3u8_formats) - m3u8_formats = list(filter( - lambda f: f.get('vcodec') != 'none', m3u8_formats)) - if len(qualities) == len(m3u8_formats): - for q, m3u8_format in zip(qualities, m3u8_formats): - f = m3u8_format.copy() - f.update({ - 'url': progressive_url.replace( - http_url_basename, http_template % q), - 'format_id': f['format_id'].replace('hls', 'http'), - 'protocol': 'http', - }) - formats.append(f) - else: - for q in qualities: - formats.append({ - 'url': progressive_url.replace( - http_url_basename, http_template % q), - 'ext': 'mp4', - 'format_id': 'http-%d' % q, - 'tbr': q, - }) - - onceux_json = self._search_regex( - r'data-onceux-options=["\'](.*?)["\']', webpage, 'data video', default=None) - if onceux_json: - onceux_url = self._parse_json(unescapeHTML(onceux_json), page_id).get('metadataUri') - if onceux_url: - formats.extend(self._extract_once_formats(re.sub( - r'https?://[^/]+', 'http://once.unicornmedia.com', onceux_url), - http_formats_preference=-1)) - - if not formats: - for quality in ['sd', 'hd']: - # It's actually a link to a flv file - flv_url = streams.get('f4m_{0}'.format(quality)) - if flv_url is not None: - formats.append({ - 'url': flv_url, - 'ext': 'flv', - 'format_id': quality, - }) + for f in m3u8_formats: + formats.append(f) + http_f = f.copy() + del http_f['manifest_url'] + http_f.update({ + 'format_id': f['format_id'].replace('hls-', 'http-'), + 'protocol': 'http', + 'url': f['url'].replace('.m3u8', '.mp4'), + }) + formats.append(http_f) + + mpd_url = streams.get('adaptive_dash') + if mpd_url: + formats.extend(self._extract_mpd_formats( + mpd_url, page_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) return { - 'id': data_video['guid'], + 'id': data_video.get('guid') or page_id, 'display_id': page_id, - 'title': compat_urllib_parse_unquote(data_video['title']), + 'title': title, 'formats': formats, 'description': self._html_search_meta('description', webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/nhk.py new/youtube-dl/youtube_dl/extractor/nhk.py --- old/youtube-dl/youtube_dl/extractor/nhk.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/nhk.py 2020-12-01 19:40:52.000000000 +0100 @@ -10,7 +10,7 @@ # Content available only for a limited period of time. Visit # https://www3.nhk.or.jp/nhkworld/en/ondemand/ for working samples. _TESTS = [{ - # clip + # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'md5': '256a1be14f48d960a7e61e2532d95ec3', 'info_dict': { @@ -22,6 +22,19 @@ 'upload_date': '20190816', }, }, { + # audio clip + 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/audio/r_inventions-20201104-1/', + 'info_dict': { + 'id': 'r_inventions-20201104-1-en', + 'ext': 'm4a', + 'title': "Japan's Top Inventions - Miniature Video Cameras", + 'description': 'md5:07ea722bdbbb4936fdd360b6a480c25b', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2015173/', 'only_matching': True, }, { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/nrk.py new/youtube-dl/youtube_dl/extractor/nrk.py --- old/youtube-dl/youtube_dl/extractor/nrk.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/nrk.py 2020-12-01 19:40:52.000000000 +0100 @@ -1,6 +1,7 @@ # coding: utf-8 from __future__ import unicode_literals +import itertools import re from .common import InfoExtractor @@ -17,6 +18,7 @@ parse_age_limit, parse_duration, try_get, + urljoin, url_or_none, ) @@ -24,6 +26,27 @@ class NRKBaseIE(InfoExtractor): _GEO_COUNTRIES = ['NO'] + def _extract_nrk_formats(self, asset_url, video_id): + return self._extract_m3u8_formats( + re.sub(r'(?:bw_(?:low|high)=\d+|no_audio_only)&?', '', asset_url), + video_id, 'mp4', 'm3u8_native', fatal=False) + + def _raise_error(self, data): + MESSAGES = { + 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', + 'ProgramRightsHasExpired': 'Programmet har gått ut', + 'NoProgramRights': 'Ikke tilgjengelig', + 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', + } + message_type = data.get('messageType', '') + # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* + if 'IsGeoBlocked' in message_type or try_get(data, lambda x: x['usageRights']['isGeoBlocked']) is True: + self.raise_geo_restricted( + msg=MESSAGES.get('ProgramIsGeoBlocked'), + countries=self._GEO_COUNTRIES) + message = data.get('endUserMessage') or MESSAGES.get(message_type, message_type) + raise ExtractorError('%s said: %s' % (self.IE_NAME, message), expected=True) + class NRKIE(NRKBaseIE): _VALID_URL = r'''(?x) @@ -82,6 +105,9 @@ 'http://psapi.nrk.no/playback/manifest/%s' % video_id, video_id, 'Downloading manifest JSON') + if manifest.get('playability') == 'nonPlayable': + self._raise_error(manifest['nonPlayable']) + playable = manifest['playable'] formats = [] @@ -94,9 +120,7 @@ if not format_url: continue if asset.get('format') == 'HLS' or determine_ext(format_url) == 'm3u8': - formats.extend(self._extract_m3u8_formats( - format_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='hls', fatal=False)) + formats.extend(self._extract_nrk_formats(format_url, video_id)) self._sort_formats(formats) data = self._download_json( @@ -143,14 +167,7 @@ class NRKTVIE(NRKBaseIE): IE_DESC = 'NRK TV and NRK Radio' _EPISODE_RE = r'(?P<id>[a-zA-Z]{4}\d{8})' - _VALID_URL = r'''(?x) - https?:// - (?:tv|radio)\.nrk(?:super)?\.no/ - (?:serie(?:/[^/]+){1,2}|program)/ - (?![Ee]pisodes)%s - (?:/\d{2}-\d{2}-\d{4})? - (?:\#del=(?P<part_id>\d+))? - ''' % _EPISODE_RE + _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/(?:[^/]+/)*%s' % _EPISODE_RE _API_HOSTS = ('psapi-ne.nrk.no', 'psapi-we.nrk.no') _TESTS = [{ 'url': 'https://tv.nrk.no/program/MDDP12000117', @@ -275,6 +292,9 @@ }, { 'url': 'https://tv.nrk.no/serie/lindmo/2018/MUHU11006318/avspiller', 'only_matching': True, + }, { + 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201507/NPUB21019315', + 'only_matching': True, }] _api_host = None @@ -295,6 +315,7 @@ title = data.get('fullTitle') or data.get('mainTitle') or data['title'] video_id = data.get('id') or video_id + urls = [] entries = [] conviva = data.get('convivaStatistics') or {} @@ -311,19 +332,13 @@ else ('%s-%d' % (video_id, idx), '%s (Part %d)' % (title, idx))) for num, asset in enumerate(media_assets, 1): asset_url = asset.get('url') - if not asset_url: + if not asset_url or asset_url in urls: continue - formats = self._extract_akamai_formats(asset_url, video_id) + formats = self._extract_nrk_formats(asset_url, video_id) if not formats: continue self._sort_formats(formats) - # Some f4m streams may not work with hdcore in fragments' URLs - for f in formats: - extra_param = f.get('extra_param_to_segment_url') - if extra_param and 'hdcore' in extra_param: - del f['extra_param_to_segment_url'] - entry_id, entry_title = video_id_and_title(num) duration = parse_duration(asset.get('duration')) subtitles = {} @@ -343,34 +358,20 @@ if not entries: media_url = data.get('mediaUrl') - if media_url: - formats = self._extract_akamai_formats(media_url, video_id) - self._sort_formats(formats) - duration = parse_duration(data.get('duration')) - entries = [{ - 'id': video_id, - 'title': make_title(title), - 'duration': duration, - 'formats': formats, - }] + if media_url and media_url not in urls: + formats = self._extract_nrk_formats(media_url, video_id) + if formats: + self._sort_formats(formats) + duration = parse_duration(data.get('duration')) + entries = [{ + 'id': video_id, + 'title': make_title(title), + 'duration': duration, + 'formats': formats, + }] if not entries: - MESSAGES = { - 'ProgramRightsAreNotReady': 'Du kan dessverre ikke se eller høre programmet', - 'ProgramRightsHasExpired': 'Programmet har gått ut', - 'NoProgramRights': 'Ikke tilgjengelig', - 'ProgramIsGeoBlocked': 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', - } - message_type = data.get('messageType', '') - # Can be ProgramIsGeoBlocked or ChannelIsGeoBlocked* - if 'IsGeoBlocked' in message_type: - self.raise_geo_restricted( - msg=MESSAGES.get('ProgramIsGeoBlocked'), - countries=self._GEO_COUNTRIES) - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, MESSAGES.get( - message_type, message_type)), - expected=True) + self._raise_error(data) series = conviva.get('seriesName') or data.get('seriesTitle') episode = conviva.get('episodeName') or data.get('episodeNumberOrDate') @@ -517,7 +518,8 @@ config = self._parse_json( self._search_regex( (r'INITIAL_DATA(?:_V\d)?_*\s*=\s*({.+?})\s*;', - r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>'), + r'({.+?})\s*,\s*"[^"]+"\s*\)\s*</script>', + r'PRELOADED_STATE_*\s*=\s*({.+?})\s*\n'), webpage, 'config', default='{}' if not fatal else NO_DEFAULT), display_id, fatal=False, transform_source=js_to_json) if not config: @@ -527,12 +529,26 @@ (lambda x: x['initialState']['series'], lambda x: x['series']), dict) - def _extract_seasons(self, seasons): + def _extract_seasons(self, domain, series_id, seasons): + if isinstance(seasons, dict): + seasons = seasons.get('seasons') if not isinstance(seasons, list): return [] entries = [] for season in seasons: - entries.extend(self._extract_episodes(season)) + if not isinstance(season, dict): + continue + episodes = self._extract_episodes(season) + if episodes: + entries.extend(episodes) + continue + season_name = season.get('name') + if season_name and isinstance(season_name, compat_str): + entries.append(self.url_result( + 'https://%s.nrk.no/serie/%s/sesong/%s' + % (domain, series_id, season_name), + ie=NRKTVSeasonIE.ie_key(), + video_title=season.get('title'))) return entries def _extract_episodes(self, season): @@ -545,24 +561,94 @@ return [] entries = [] for episode in entry_list: - nrk_id = episode.get('prfId') + nrk_id = episode.get('prfId') or episode.get('episodeId') if not nrk_id or not isinstance(nrk_id, compat_str): continue + if not re.match(NRKTVIE._EPISODE_RE, nrk_id): + continue entries.append(self.url_result( 'nrk:%s' % nrk_id, ie=NRKIE.ie_key(), video_id=nrk_id)) return entries + _ASSETS_KEYS = ('episodes', 'instalments',) + + def _extract_assets_key(self, embedded): + for asset_key in self._ASSETS_KEYS: + if embedded.get(asset_key): + return asset_key + + def _entries(self, data, display_id): + for page_num in itertools.count(1): + embedded = data.get('_embedded') + if not isinstance(embedded, dict): + break + assets_key = self._extract_assets_key(embedded) + if not assets_key: + break + # Extract entries + entries = try_get( + embedded, + (lambda x: x[assets_key]['_embedded'][assets_key], + lambda x: x[assets_key]), + list) + for e in self._extract_entries(entries): + yield e + # Find next URL + next_url = urljoin( + 'https://psapi.nrk.no/', + try_get( + data, + (lambda x: x['_links']['next']['href'], + lambda x: x['_embedded'][assets_key]['_links']['next']['href']), + compat_str)) + if not next_url: + break + data = self._download_json( + next_url, display_id, + 'Downloading %s JSON page %d' % (assets_key, page_num), + fatal=False) + if not data: + break + class NRKTVSeasonIE(NRKTVSerieBaseIE): - _VALID_URL = r'https?://tv\.nrk\.no/serie/[^/]+/sesong/(?P<id>\d+)' - _TEST = { + _VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk\.no/serie/(?P<serie>[^/]+)/(?:sesong/)?(?P<id>\d+)' + _TESTS = [{ 'url': 'https://tv.nrk.no/serie/backstage/sesong/1', 'info_dict': { - 'id': '1', + 'id': 'backstage/1', 'title': 'Sesong 1', }, 'playlist_mincount': 30, - } + }, { + # no /sesong/ in path + 'url': 'https://tv.nrk.no/serie/lindmo/2016', + 'info_dict': { + 'id': 'lindmo/2016', + 'title': '2016', + }, + 'playlist_mincount': 29, + }, { + # weird nested _embedded in catalog JSON response + 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens/sesong/1', + 'info_dict': { + 'id': 'dickie-dick-dickens/1', + 'title': 'Sesong 1', + }, + 'playlist_mincount': 11, + }, { + # 841 entries, multi page + 'url': 'https://radio.nrk.no/serie/dagsnytt/sesong/201509', + 'info_dict': { + 'id': 'dagsnytt/201509', + 'title': 'September 2015', + }, + 'playlist_mincount': 841, + }, { + # 180 entries, single page + 'url': 'https://tv.nrk.no/serie/spangas/sesong/1', + 'only_matching': True, + }] @classmethod def suitable(cls, url): @@ -570,25 +656,39 @@ else super(NRKTVSeasonIE, cls).suitable(url)) def _real_extract(self, url): - display_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + domain = mobj.group('domain') + serie = mobj.group('serie') + season_id = mobj.group('id') + display_id = '%s/%s' % (serie, season_id) - webpage = self._download_webpage(url, display_id) - - series = self._extract_series(webpage, display_id) - - season = next( - s for s in series['seasons'] - if int(display_id) == s.get('seasonNumber')) + data = self._download_json( + 'https://psapi.nrk.no/%s/catalog/series/%s/seasons/%s' + % (domain, serie, season_id), display_id, query={'pageSize': 50}) - title = try_get(season, lambda x: x['titles']['title'], compat_str) + title = try_get(data, lambda x: x['titles']['title'], compat_str) or display_id return self.playlist_result( - self._extract_episodes(season), display_id, title) + self._entries(data, display_id), + display_id, title) class NRKTVSeriesIE(NRKTVSerieBaseIE): - _VALID_URL = r'https?://(?:tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' + _VALID_URL = r'https?://(?P<domain>tv|radio)\.nrk(?:super)?\.no/serie/(?P<id>[^/]+)' _ITEM_RE = r'(?:data-season=["\']|id=["\']season-)(?P<id>\d+)' _TESTS = [{ + # new layout, instalments + 'url': 'https://tv.nrk.no/serie/groenn-glede', + 'info_dict': { + 'id': 'groenn-glede', + 'title': 'Grønn glede', + 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', + }, + 'playlist_mincount': 90, + }, { + # new layout, instalments, more entries + 'url': 'https://tv.nrk.no/serie/lindmo', + 'only_matching': True, + }, { 'url': 'https://tv.nrk.no/serie/blank', 'info_dict': { 'id': 'blank', @@ -596,24 +696,17 @@ 'description': 'md5:7664b4e7e77dc6810cd3bca367c25b6e', }, 'playlist_mincount': 30, + 'expected_warnings': ['HTTP Error 404: Not Found'], }, { # new layout, seasons 'url': 'https://tv.nrk.no/serie/backstage', 'info_dict': { 'id': 'backstage', 'title': 'Backstage', - 'description': 'md5:c3ec3a35736fca0f9e1207b5511143d3', + 'description': 'md5:63692ceb96813d9a207e9910483d948b', }, 'playlist_mincount': 60, - }, { - # new layout, instalments - 'url': 'https://tv.nrk.no/serie/groenn-glede', - 'info_dict': { - 'id': 'groenn-glede', - 'title': 'Grønn glede', - 'description': 'md5:7576e92ae7f65da6993cf90ee29e4608', - }, - 'playlist_mincount': 10, + 'expected_warnings': ['HTTP Error 404: Not Found'], }, { # old layout 'url': 'https://tv.nrksuper.no/serie/labyrint', @@ -632,6 +725,13 @@ }, { 'url': 'https://tv.nrk.no/serie/postmann-pat', 'only_matching': True, + }, { + 'url': 'https://radio.nrk.no/serie/dickie-dick-dickens', + 'info_dict': { + 'id': 'dickie-dick-dickens', + }, + 'playlist_mincount': 8, + 'expected_warnings': ['HTTP Error 404: Not Found'], }] @classmethod @@ -642,18 +742,32 @@ else super(NRKTVSeriesIE, cls).suitable(url)) def _real_extract(self, url): - series_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + domain = mobj.group('domain') + series_id = mobj.group('id') + + title = description = None webpage = self._download_webpage(url, series_id) - # New layout (e.g. https://tv.nrk.no/serie/backstage) series = self._extract_series(webpage, series_id, fatal=False) if series: title = try_get(series, lambda x: x['titles']['title'], compat_str) description = try_get( series, lambda x: x['titles']['subtitle'], compat_str) + + data = self._download_json( + 'https://psapi.nrk.no/%s/catalog/series/%s/instalments' + % (domain, series_id), series_id, query={'pageSize': 50}, + fatal=False) + if data: + return self.playlist_result( + self._entries(data, series_id), series_id, title, description) + + # New layout (e.g. https://tv.nrk.no/serie/backstage) + if series: entries = [] - entries.extend(self._extract_seasons(series.get('seasons'))) + entries.extend(self._extract_seasons(domain, series_id, series.get('seasons'))) entries.extend(self._extract_entries(series.get('instalments'))) entries.extend(self._extract_episodes(series.get('extraMaterial'))) return self.playlist_result(entries, series_id, title, description) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/pornhub.py new/youtube-dl/youtube_dl/extractor/pornhub.py --- old/youtube-dl/youtube_dl/extractor/pornhub.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/pornhub.py 2020-12-01 19:40:52.000000000 +0100 @@ -31,7 +31,12 @@ def dl(*args, **kwargs): return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) - webpage, urlh = dl(*args, **kwargs) + ret = dl(*args, **kwargs) + + if not ret: + return ret + + webpage, urlh = ret if any(re.search(p, webpage) for p in ( r'<body\b[^>]+\bonload=["\']go\(\)', @@ -53,7 +58,7 @@ _VALID_URL = r'''(?x) https?:// (?: - (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| + (?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P<id>[\da-z]+) @@ -153,6 +158,9 @@ 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'only_matching': True, }, { + 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', + 'only_matching': True, + }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', 'only_matching': True, }] @@ -160,7 +168,7 @@ @staticmethod def _extract_urls(webpage): return re.findall( - r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net)/embed/[\da-z]+)', + r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?pornhub\.(?:com|net|org)/embed/[\da-z]+)', webpage) def _extract_count(self, pattern, webpage, name): @@ -422,7 +430,7 @@ class PornHubUserIE(PornHubPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, @@ -490,7 +498,7 @@ class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?P<id>(?:[^/]+/)*[^/?#&]+)' + _VALID_URL = r'https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, @@ -605,7 +613,7 @@ class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): - _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' + _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?(?P<host>pornhub(?:premium)?\.(?:com|net|org))/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/teachable.py new/youtube-dl/youtube_dl/extractor/teachable.py --- old/youtube-dl/youtube_dl/extractor/teachable.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/teachable.py 2020-12-01 19:40:52.000000000 +0100 @@ -269,7 +269,7 @@ r'(?s)(?P<li><li[^>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)', webpage): li = mobj.group('li') - if 'fa-youtube-play' not in li: + if 'fa-youtube-play' not in li and not re.search(r'\d{1,2}:\d{2}', li): continue lecture_url = self._search_regex( r'<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', li, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/tver.py new/youtube-dl/youtube_dl/extractor/tver.py --- old/youtube-dl/youtube_dl/extractor/tver.py 1970-01-01 01:00:00.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/tver.py 2020-12-01 19:40:52.000000000 +0100 @@ -0,0 +1,67 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..compat import compat_str +from ..utils import ( + int_or_none, + remove_start, + smuggle_url, + try_get, +) + + +class TVerIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?tver\.jp/(?P<path>(?:corner|episode|feature)/(?P<id>f?\d+))' + # videos are only available for 7 days + _TESTS = [{ + 'url': 'https://tver.jp/corner/f0062178', + 'only_matching': True, + }, { + 'url': 'https://tver.jp/feature/f0062413', + 'only_matching': True, + }, { + 'url': 'https://tver.jp/episode/79622438', + 'only_matching': True, + }] + _TOKEN = None + BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s' + + def _real_initialize(self): + self._TOKEN = self._download_json( + 'https://tver.jp/api/access_token.php', None)['token'] + + def _real_extract(self, url): + path, video_id = re.match(self._VALID_URL, url).groups() + main = self._download_json( + 'https://api.tver.jp/v4/' + path, video_id, + query={'token': self._TOKEN})['main'] + p_id = main['publisher_id'] + service = remove_start(main['service'], 'ts_') + info = { + '_type': 'url_transparent', + 'description': try_get(main, lambda x: x['note'][0]['text'], compat_str), + 'episode_number': int_or_none(try_get(main, lambda x: x['ext']['episode_number'])), + } + + if service == 'cx': + info.update({ + 'title': main.get('subtitle') or main['title'], + 'url': 'https://i.fod.fujitv.co.jp/plus7/web/%s/%s.html' % (p_id[:4], p_id), + 'ie_key': 'FujiTVFODPlus7', + }) + else: + r_id = main['reference_id'] + if service not in ('tx', 'russia2018', 'sebare2018live', 'gorin'): + r_id = 'ref:' + r_id + bc_url = smuggle_url( + self.BRIGHTCOVE_URL_TEMPLATE % (p_id, r_id), + {'geo_countries': ['JP']}) + info.update({ + 'url': bc_url, + 'ie_key': 'BrightcoveNew', + }) + + return info diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/extractor/zdf.py new/youtube-dl/youtube_dl/extractor/zdf.py --- old/youtube-dl/youtube_dl/extractor/zdf.py 2020-11-29 07:55:15.000000000 +0100 +++ new/youtube-dl/youtube_dl/extractor/zdf.py 2020-12-01 19:40:52.000000000 +0100 @@ -40,7 +40,7 @@ class ZDFIE(ZDFBaseIE): _VALID_URL = r'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?]+)\.html' - _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh') + _QUALITIES = ('auto', 'low', 'med', 'high', 'veryhigh', 'hd') _GEO_COUNTRIES = ['DE'] _TESTS = [{ @@ -119,7 +119,7 @@ if not ptmd_path: ptmd_path = t[ 'http://zdf.de/rels/streams/ptmd-template'].replace( - '{playerId}', 'portal') + '{playerId}', 'ngplayer_2_4') ptmd = self._call_api( urljoin(url, ptmd_path), player, url, video_id, 'metadata') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/youtube-dl/youtube_dl/version.py new/youtube-dl/youtube_dl/version.py --- old/youtube-dl/youtube_dl/version.py 2020-12-01 19:37:37.000000000 +0100 +++ new/youtube-dl/youtube_dl/version.py 2020-12-04 21:39:56.000000000 +0100 @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = '2020.12.02' +__version__ = '2020.12.05' _______________________________________________ openSUSE Commits mailing list -- [email protected] To unsubscribe, email [email protected] List Netiquette: https://en.opensuse.org/openSUSE:Mailing_list_netiquette List Archives: https://lists.opensuse.org/archives/list/[email protected]
