Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package youtube-dl for openSUSE:Factory checked in at 2022-04-08 00:28:04 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/youtube-dl (Old) and /work/SRC/openSUSE:Factory/.youtube-dl.new.1900 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "youtube-dl" Fri Apr 8 00:28:04 2022 rev:174 rq:967471 version:2021.12.17 Changes: -------- --- /work/SRC/openSUSE:Factory/youtube-dl/python-youtube-dl.changes 2021-12-29 21:11:22.550306204 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.1900/python-youtube-dl.changes 2022-04-08 00:28:41.657664683 +0200 @@ -1,0 +2,16 @@ +Wed Apr 6 07:11:23 UTC 2022 - Matej Cepl <mc...@suse.com> + +- Add 30713-new-ceskatelevize.patch fixing download + from the redesigned https://www.ceskatelevize.cz/ + (gh#ytdl-org/youtube-dl#30713). +- Add no-pandoc-32bit.patch patching out the need for pandoc + (which is not available on all platforms). + +------------------------------------------------------------------- +Fri Feb 25 13:06:07 UTC 2022 - Matej Cepl <mc...@suse.com> + +- Fish completions should be installed into + /usr/share/fish/vendor_completions.d, where it doesn't conflict + with the fish's own ones (gh#pyenv/pyenv#1944). + +------------------------------------------------------------------- --- /work/SRC/openSUSE:Factory/youtube-dl/youtube-dl.changes 2022-02-26 17:02:57.759544286 +0100 +++ /work/SRC/openSUSE:Factory/.youtube-dl.new.1900/youtube-dl.changes 2022-04-08 00:28:41.681664412 +0200 @@ -1,0 +2,9 @@ +Wed Apr 6 07:11:23 UTC 2022 - Matej Cepl <mc...@suse.com> + +- Add 30713-new-ceskatelevize.patch fixing download + from the redesigned https://www.ceskatelevize.cz/ + (gh#ytdl-org/youtube-dl#30713). +- Add no-pandoc-32bit.patch patching out the need for pandoc + (which is not available on all platforms). + +------------------------------------------------------------------- New: ---- 30713-new-ceskatelevize.patch no-pandoc-32bit.patch youtube-dl.1.temp.rst ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ youtube-dl.spec ++++++ --- /var/tmp/diff_new_pack.tMaTCQ/_old 2022-04-08 00:28:42.753652325 +0200 +++ /var/tmp/diff_new_pack.tMaTCQ/_new 2022-04-08 00:28:42.765652190 +0200 @@ -27,9 +27,23 @@ Source: https://yt-dl.org/downloads/%version/%name-%version.tar.gz Source2: https://yt-dl.org/downloads/%version/%name-%version.tar.gz.sig Source3: %name.keyring +# Generate with +# python3 devscripts/prepare_manpage.py youtube-dl.1.temp.md +# pandoc -f markdown -t rst -o youtube-dl.1.temp.rst youtube-dl.1.temp.md +# and hand editing until rst2man youtube-dl.1.temp.rst youtube.1 runs +Source4: youtube-dl.1.temp.rst +# PATCH-FIX-UPSTREAM 30713-new-ceskatelevize.patch gh#ytdl-org/youtube-dl#30713 mc...@suse.com +# Rewrite of the support for https://www.ceskatelevize.cz/ +Patch0: 30713-new-ceskatelevize.patch +# PATCH-FEATURE-OPENSUSE no-pandoc-32bit.patch mc...@suse.com +# 32bit architectures don't have pandoc +Patch1: no-pandoc-32bit.patch BuildRequires: make >= 4 BuildRequires: python3-devel BuildRequires: python3-xml +# For documentation +BuildRequires: python3-docutils +BuildRequires: python3-Pygments BuildRequires: zip Requires: ffmpeg Requires: python3 @@ -73,6 +87,8 @@ %prep %autosetup -p1 -n %name +cp --no-preserve=timestamps %{SOURCE4} . + %build rm -f youtube-dl PYTHON="%_bindir/python3" %make_build ++++++ 30713-new-ceskatelevize.patch ++++++ >From d799c405010e50f75e253ace218de9c4be8744a9 Mon Sep 17 00:00:00 2001 From: dirkf <fieldho...@gmx.net> Date: Tue, 1 Mar 2022 08:11:50 +0000 Subject: [PATCH] [extractor/ceskatelevize] Back-port extractor from yt-dlp * back-port extractor with traverse_obj() shim, removing CeskaTelevizePoradyIE * follow redirect URL * support liveBroadcast and videobonusDetail in __NEXT__ data * id must be compat_str * return single video for singleton playlist * fix/add tests --- youtube_dl/compat.py | 4 youtube_dl/extractor/ceskatelevize.py | 277 ++++++++++++++++++++++++---------- youtube_dl/extractor/extractors.py | 5 3 files changed, 203 insertions(+), 83 deletions(-) --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -21,6 +21,10 @@ import subprocess import sys import xml.etree.ElementTree +try: + import collections.abc as compat_collections_abc +except ImportError: + import collections as compat_collections_abc try: import urllib.request as compat_urllib_request --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -12,70 +12,243 @@ from ..utils import ( ExtractorError, float_or_none, sanitized_Request, - unescapeHTML, - update_url_query, + str_or_none, urlencode_postdata, USER_AGENTS, ) +try: + from ..utils import traverse_obj +except ImportError: + from ..compat import (compat_str, compat_collections_abc) + from ..utils import int_or_none + + def traverse_obj(obj, *path_list, **kw): + ''' Traverse nested list/dict/tuple''' + + # parameter defaults + default = kw.get('default') + expected_type = kw.get('expected_type') + get_all = kw.get('get_all', True) + casesense = kw.get('casesense', True) + is_user_input = kw.get('is_user_input', False) + traverse_string = kw.get('traverse_string', False) + + def variadic(x, allowed_types=(compat_str, bytes)): + return x if isinstance(x, compat_collections_abc.Iterable) and not isinstance(x, allowed_types) else (x,) + + def listish(l): + return isinstance(l, (list, tuple)) + + def from_iterable(iterables): + # chain.from_iterable(['ABC', 'DEF']) --> A B C D E F + for it in iterables: + for element in it: + yield element + + class Nonlocal: + pass + nl = Nonlocal() + + if not casesense: + _lower = lambda k: (k.lower() if isinstance(k, compat_str) else k) + path_list = (map(_lower, variadic(path)) for path in path_list) + + def _traverse_obj(obj, path, _current_depth=0): + path = tuple(variadic(path)) + for i, key in enumerate(path): + if obj is None: + return None + if listish(key): + obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key] + key = Ellipsis + if key is Ellipsis: + obj = (obj.values() if isinstance(obj, dict) + else obj if listish(obj) + else compat_str(obj) if traverse_string else []) + _current_depth += 1 + nl.depth = max(nl.depth, _current_depth) + return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj] + elif callable(key): + if listish(obj): + obj = enumerate(obj) + elif isinstance(obj, dict): + obj = obj.items() + else: + if not traverse_string: + return None + obj = str(obj) + _current_depth += 1 + nl.depth = max(nl.depth, _current_depth) + return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)] + elif isinstance(obj, dict) and not (is_user_input and key == ':'): + obj = (obj.get(key) if casesense or (key in obj) + else next((v for k, v in obj.items() if _lower(k) == key), None)) + else: + if is_user_input: + key = (int_or_none(key) if ':' not in key + else slice(*map(int_or_none, key.split(':')))) + if key == slice(None): + return _traverse_obj(obj, tuple([Ellipsis] + list(path[i + 1:])), _current_depth) + if not isinstance(key, (int, slice)): + return None + if not listish(obj): + if not traverse_string: + return None + obj = compat_str(obj) + try: + obj = obj[key] + except IndexError: + return None + return obj + + if isinstance(expected_type, type): + type_test = lambda val: val if isinstance(val, expected_type) else None + elif expected_type is not None: + type_test = expected_type + else: + type_test = lambda val: val + + for path in path_list: + nl.depth = 0 + val = _traverse_obj(obj, path) + if val is not None: + if nl.depth: + for _ in range(nl.depth - 1): + val = from_iterable(v for v in val if v is not None) + val = [v for v in map(type_test, val) if v is not None] + if val: + return val if get_all else val[0] + else: + val = type_test(val) + if val is not None: + return val + return default + class CeskaTelevizeIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/ivysilani/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' + _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/(?:ivysilani|porady|zive)/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' _TESTS = [{ - 'url': 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220', + 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', 'info_dict': { - 'id': '61924494877246241', + 'id': '61924494877028507', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: ??ivot v Gr??nsku', - 'description': 'md5:3fec8f6bb497be5cdb0c9e8781076626', + 'title': 'Bonus 01 - En - Hyde Park Civilizace', + 'description': 'English Subtittles', 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 3350, + 'duration': 81.3, }, 'params': { # m3u8 download 'skip_download': True, }, }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/10441294653-hyde-park-civilizace/215411058090502/bonus/20641-bonus-01-en', + # live stream + 'url': 'http://www.ceskatelevize.cz/zive/ct1/', 'info_dict': { - 'id': '61924494877028507', + 'id': '102', 'ext': 'mp4', - 'title': 'Hyde Park Civilizace: Bonus 01 - En', - 'description': 'English Subtittles', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 81.3, + 'title': r'??T1 - ??iv?? vys??l??n?? online', + 'description': 'Sledujte ??iv?? vys??l??n?? kan??lu ??T1 online. Vyb??rat si m????ete i z dal????ch kan??l?? ??esk?? televize na kter??mkoli z va??ich za????zen??.', + 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, }, { - # live stream + # another 'url': 'http://www.ceskatelevize.cz/ivysilani/zive/ct4/', + 'only_matching': True, 'info_dict': { 'id': 402, 'ext': 'mp4', 'title': r're:^??T Sport \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'is_live': True, }, + # 'skip': 'Georestricted to Czech Republic', + }, { + 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', + 'only_matching': True, + }, { + # video with 18+ caution trailer + 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', + 'info_dict': { + 'id': '215562210900007-bogotart', + 'title': 'Bogotart - Queer', + 'description': 'Hlavn?? m??sto Kolumbie v doprovodu queer um??lc??. Vrouc?? sv??t pln?? v????n??, sebev??dom??, ale i n??sil?? a bolesti', + }, + 'playlist': [{ + 'info_dict': { + 'id': '61924494877311053', + 'ext': 'mp4', + 'title': 'Bogotart - Queer (Varov??n?? 18+)', + 'duration': 11.9, + }, + }, { + 'info_dict': { + 'id': '61924494877068022', + 'ext': 'mp4', + 'title': 'Bogotart - Queer (Queer)', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 1558.3, + }, + }], 'params': { # m3u8 download 'skip_download': True, }, - 'skip': 'Georestricted to Czech Republic', }, { - 'url': 'http://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php?hash=d6a3e1370d2e4fa76296b90bad4dfc19673b641e&IDEC=217 562 22150/0004&channelID=1&width=100%25', + # iframe embed + 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', 'only_matching': True, }] + def _search_nextjs_data(self, webpage, video_id, **kw): + return self._parse_json( + self._search_regex( + r'(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', + webpage, 'next.js data', **kw), + video_id, **kw) + def _real_extract(self, url): playlist_id = self._match_id(url) - - webpage = self._download_webpage(url, playlist_id) + webpage, urlh = self._download_webpage_handle(url, playlist_id) + parsed_url = compat_urllib_parse_urlparse(urlh.geturl()) + site_name = self._og_search_property('site_name', webpage, fatal=False, default='??esk?? televize') + playlist_title = self._og_search_title(webpage, default=None) + if site_name and playlist_title: + playlist_title = re.split(r'\s*[???|]\s*%s' % (site_name, ), playlist_title, 1)[0] + playlist_description = self._og_search_description(webpage, default=None) + if playlist_description: + playlist_description = playlist_description.replace('\xa0', ' ') + + type_ = 'IDEC' + if re.search(r'(^/porady|/zive)/', parsed_url.path): + next_data = self._search_nextjs_data(webpage, playlist_id) + if '/zive/' in parsed_url.path: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'liveBroadcast', 'current', 'idec'), get_all=False) + else: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', ('show', 'mediaMeta'), 'idec'), get_all=False) + if not idec: + idec = traverse_obj(next_data, ('props', 'pageProps', 'data', 'videobonusDetail', 'bonusId'), get_all=False) + if idec: + type_ = 'bonus' + if not idec: + raise ExtractorError('Failed to find IDEC id') + iframe_hash = self._download_webpage( + 'https://www.ceskatelevize.cz/v-api/iframe-hash/', + playlist_id, note='Getting IFRAME hash') + query = {'hash': iframe_hash, 'origin': 'iVysilani', 'autoStart': 'true', type_: idec, } + webpage = self._download_webpage( + 'https://www.ceskatelevize.cz/ivysilani/embed/iFramePlayer.php', + playlist_id, note='Downloading player', query=query) NOT_AVAILABLE_STRING = 'This content is not available at your territory due to limited copyright.' if '%s</p>' % NOT_AVAILABLE_STRING in webpage: - raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) + self.raise_geo_restricted(NOT_AVAILABLE_STRING) + if any(not_found in webpage for not_found in ('Neplatn?? parametr pro videop??ehr??va??', 'IDEC nebyl nalezen', )): + raise ExtractorError('no video with IDEC available', video_id=idec, expected=True) type_ = None episode_id = None @@ -100,7 +273,7 @@ class CeskaTelevizeIE(InfoExtractor): data = { 'playlist[0][type]': type_, 'playlist[0][id]': episode_id, - 'requestUrl': compat_urllib_parse_urlparse(url).path, + 'requestUrl': parsed_url.path, 'requestSource': 'iVysilani', } @@ -108,7 +281,7 @@ class CeskaTelevizeIE(InfoExtractor): for user_agent in (None, USER_AGENTS['Safari']): req = sanitized_Request( - 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist', + 'https://www.ceskatelevize.cz/ivysilani/ajax/get-client-playlist/', data=urlencode_postdata(data)) req.add_header('Content-type', 'application/x-www-form-urlencoded') @@ -130,9 +303,6 @@ class CeskaTelevizeIE(InfoExtractor): req = sanitized_Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) - playlist_title = self._og_search_title(webpage, default=None) - playlist_description = self._og_search_description(webpage, default=None) - playlist = self._download_json(req, playlist_id, fatal=False) if not playlist: continue @@ -167,7 +337,7 @@ class CeskaTelevizeIE(InfoExtractor): entries[num]['formats'].extend(formats) continue - item_id = item.get('id') or item['assetId'] + item_id = str_or_none(item.get('id') or item['assetId']) title = item['title'] duration = float_or_none(item.get('duration')) @@ -181,8 +351,6 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_len == 1: final_title = playlist_title or title - if is_live: - final_title = self._live_title(final_title) else: final_title = '%s (%s)' % (playlist_title, title) @@ -200,6 +368,8 @@ class CeskaTelevizeIE(InfoExtractor): for e in entries: self._sort_formats(e['formats']) + if len(entries) == 1: + return entries[0] return self.playlist_result(entries, playlist_id, playlist_title, playlist_description) def _get_subtitles(self, episode_id, subs): @@ -236,54 +406,3 @@ class CeskaTelevizeIE(InfoExtractor): yield line return '\r\n'.join(_fix_subtitle(subtitles)) - - -class CeskaTelevizePoradyIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?ceskatelevize\.cz/porady/(?:[^/?#&]+/)*(?P<id>[^/#?]+)' - _TESTS = [{ - # video with 18+ caution trailer - 'url': 'http://www.ceskatelevize.cz/porady/10520528904-queer/215562210900007-bogotart/', - 'info_dict': { - 'id': '215562210900007-bogotart', - 'title': 'Queer: Bogotart', - 'description': 'Alternativn?? pr??vodce sou??asn??m queer sv??tem', - }, - 'playlist': [{ - 'info_dict': { - 'id': '61924494876844842', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Varov??n?? 18+)', - 'duration': 10.2, - }, - }, { - 'info_dict': { - 'id': '61924494877068022', - 'ext': 'mp4', - 'title': 'Queer: Bogotart (Queer)', - 'thumbnail': r're:^https?://.*\.jpg', - 'duration': 1558.3, - }, - }], - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, { - # iframe embed - 'url': 'http://www.ceskatelevize.cz/porady/10614999031-neviditelni/21251212048/', - 'only_matching': True, - }] - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage(url, video_id) - - data_url = update_url_query(unescapeHTML(self._search_regex( - (r'<span[^>]*\bdata-url=(["\'])(?P<url>(?:(?!\1).)+)\1', - r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//(?:www\.)?ceskatelevize\.cz/ivysilani/embed/iFramePlayer\.php.*?)\1'), - webpage, 'iframe player url', group='url')), query={ - 'autoStart': 'true', - }) - - return self.url_result(data_url, ie=CeskaTelevizeIE.ie_key()) --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -202,10 +202,7 @@ from .ccc import ( from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE -from .ceskatelevize import ( - CeskaTelevizeIE, - CeskaTelevizePoradyIE, -) +from .ceskatelevize import CeskaTelevizeIE from .channel9 import Channel9IE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE ++++++ no-pandoc-32bit.patch ++++++ --- Makefile | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) --- a/Makefile +++ b/Makefile @@ -14,9 +14,6 @@ PYTHON ?= /usr/bin/env python # set SYSCONFDIR to /etc if PREFIX=/usr or PREFIX=/usr/local SYSCONFDIR = $(shell if [ $(PREFIX) = /usr -o $(PREFIX) = /usr/local ]; then echo /etc; else echo $(PREFIX)/etc; fi) -# set markdown input format to "markdown-smart" for pandoc version 2 and to "markdown" for pandoc prior to version 2 -MARKDOWN = $(shell if [ `pandoc -v | head -n1 | cut -d" " -f2 | head -c1` = "2" ]; then echo markdown-smart; else echo markdown; fi) - install: youtube-dl youtube-dl.1 youtube-dl.bash-completion youtube-dl.zsh youtube-dl.fish install -d $(DESTDIR)$(BINDIR) install -m 755 youtube-dl $(DESTDIR)$(BINDIR) @@ -89,12 +86,11 @@ supportedsites: $(PYTHON) devscripts/make_supportedsites.py docs/supportedsites.md README.txt: README.md - pandoc -f $(MARKDOWN) -t plain README.md -o README.txt + cp README.md README.txt -youtube-dl.1: README.md - $(PYTHON) devscripts/prepare_manpage.py youtube-dl.1.temp.md - pandoc -s -f $(MARKDOWN) -t man youtube-dl.1.temp.md -o youtube-dl.1 - rm -f youtube-dl.1.temp.md +youtube-dl.1: youtube-dl.1.temp.rst + rst2man youtube-dl.1.temp.rst youtube-dl.1 + rm -f youtube-dl.1.temp.rst youtube-dl.bash-completion: youtube_dl/*.py youtube_dl/*/*.py devscripts/bash-completion.in $(PYTHON) devscripts/bash-completion.py ++++++ youtube-dl.1.temp.rst ++++++ ++++ 2224 lines (skipped)